1 //=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the WebAssemblyTargetLowering class.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "WebAssemblyISelLowering.h"
15 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
16 #include "Utils/WebAssemblyUtilities.h"
17 #include "WebAssemblyMachineFunctionInfo.h"
18 #include "WebAssemblySubtarget.h"
19 #include "WebAssemblyTargetMachine.h"
20 #include "llvm/CodeGen/CallingConvLower.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineJumpTableInfo.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/CodeGen/SelectionDAGNodes.h"
27 #include "llvm/CodeGen/WasmEHFuncInfo.h"
28 #include "llvm/IR/DiagnosticInfo.h"
29 #include "llvm/IR/DiagnosticPrinter.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/IntrinsicsWebAssembly.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/MathExtras.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include "llvm/Target/TargetOptions.h"
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "wasm-lower"
41 
42 WebAssemblyTargetLowering::WebAssemblyTargetLowering(
43     const TargetMachine &TM, const WebAssemblySubtarget &STI)
44     : TargetLowering(TM), Subtarget(&STI) {
45   auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
46 
47   // Booleans always contain 0 or 1.
48   setBooleanContents(ZeroOrOneBooleanContent);
49   // Except in SIMD vectors
50   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
51   // We don't know the microarchitecture here, so just reduce register pressure.
52   setSchedulingPreference(Sched::RegPressure);
53   // Tell ISel that we have a stack pointer.
54   setStackPointerRegisterToSaveRestore(
55       Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
56   // Set up the register classes.
57   addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
58   addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
59   addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
60   addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
61   if (Subtarget->hasSIMD128()) {
62     addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
63     addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
64     addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
65     addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
66     addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
67     addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
68   }
69   // Compute derived properties from the register classes.
70   computeRegisterProperties(Subtarget->getRegisterInfo());
71 
72   // Transform loads and stores to pointers in address space 1 to loads and
73   // stores to WebAssembly global variables, outside linear memory.
74   for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {
75     setOperationAction(ISD::LOAD, T, Custom);
76     setOperationAction(ISD::STORE, T, Custom);
77   }
78   if (Subtarget->hasSIMD128()) {
79     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
80                    MVT::v2f64}) {
81       setOperationAction(ISD::LOAD, T, Custom);
82       setOperationAction(ISD::STORE, T, Custom);
83     }
84   }
85 
86   setOperationAction(ISD::GlobalAddress, MVTPtr, Custom);
87   setOperationAction(ISD::GlobalTLSAddress, MVTPtr, Custom);
88   setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom);
89   setOperationAction(ISD::JumpTable, MVTPtr, Custom);
90   setOperationAction(ISD::BlockAddress, MVTPtr, Custom);
91   setOperationAction(ISD::BRIND, MVT::Other, Custom);
92 
93   // Take the default expansion for va_arg, va_copy, and va_end. There is no
94   // default action for va_start, so we do that custom.
95   setOperationAction(ISD::VASTART, MVT::Other, Custom);
96   setOperationAction(ISD::VAARG, MVT::Other, Expand);
97   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
98   setOperationAction(ISD::VAEND, MVT::Other, Expand);
99 
100   for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
101     // Don't expand the floating-point types to constant pools.
102     setOperationAction(ISD::ConstantFP, T, Legal);
103     // Expand floating-point comparisons.
104     for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
105                     ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
106       setCondCodeAction(CC, T, Expand);
107     // Expand floating-point library function operators.
108     for (auto Op :
109          {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA})
110       setOperationAction(Op, T, Expand);
111     // Note supported floating-point library function operators that otherwise
112     // default to expand.
113     for (auto Op :
114          {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT})
115       setOperationAction(Op, T, Legal);
116     // Support minimum and maximum, which otherwise default to expand.
117     setOperationAction(ISD::FMINIMUM, T, Legal);
118     setOperationAction(ISD::FMAXIMUM, T, Legal);
119     // WebAssembly currently has no builtin f16 support.
120     setOperationAction(ISD::FP16_TO_FP, T, Expand);
121     setOperationAction(ISD::FP_TO_FP16, T, Expand);
122     setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand);
123     setTruncStoreAction(T, MVT::f16, Expand);
124   }
125 
126   // Expand unavailable integer operations.
127   for (auto Op :
128        {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,
129         ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS,
130         ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) {
131     for (auto T : {MVT::i32, MVT::i64})
132       setOperationAction(Op, T, Expand);
133     if (Subtarget->hasSIMD128())
134       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
135         setOperationAction(Op, T, Expand);
136   }
137 
138   if (Subtarget->hasNontrappingFPToInt())
139     for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT})
140       for (auto T : {MVT::i32, MVT::i64})
141         setOperationAction(Op, T, Custom);
142 
143   // SIMD-specific configuration
144   if (Subtarget->hasSIMD128()) {
145     // Hoist bitcasts out of shuffles
146     setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
147 
148     // Combine extends of extract_subvectors into widening ops
149     setTargetDAGCombine(ISD::SIGN_EXTEND);
150     setTargetDAGCombine(ISD::ZERO_EXTEND);
151 
152     // Combine int_to_fp or fp_extend of extract_vectors and vice versa into
153     // conversions ops
154     setTargetDAGCombine(ISD::SINT_TO_FP);
155     setTargetDAGCombine(ISD::UINT_TO_FP);
156     setTargetDAGCombine(ISD::FP_EXTEND);
157     setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
158 
159     // Combine concat of {s,u}int_to_fp_sat to i32x4.trunc_sat_f64x2_zero_{s,u}
160     setTargetDAGCombine(ISD::CONCAT_VECTORS);
161 
162     // Support saturating add for i8x16 and i16x8
163     for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
164       for (auto T : {MVT::v16i8, MVT::v8i16})
165         setOperationAction(Op, T, Legal);
166 
167     // Support integer abs
168     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
169       setOperationAction(ISD::ABS, T, Legal);
170 
171     // Custom lower BUILD_VECTORs to minimize number of replace_lanes
172     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
173                    MVT::v2f64})
174       setOperationAction(ISD::BUILD_VECTOR, T, Custom);
175 
176     // We have custom shuffle lowering to expose the shuffle mask
177     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
178                    MVT::v2f64})
179       setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
180 
181     // Custom lowering since wasm shifts must have a scalar shift amount
182     for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
183       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
184         setOperationAction(Op, T, Custom);
185 
186     // Custom lower lane accesses to expand out variable indices
187     for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT})
188       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
189                      MVT::v2f64})
190         setOperationAction(Op, T, Custom);
191 
192     // There is no i8x16.mul instruction
193     setOperationAction(ISD::MUL, MVT::v16i8, Expand);
194 
195     // There is no vector conditional select instruction
196     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
197                    MVT::v2f64})
198       setOperationAction(ISD::SELECT_CC, T, Expand);
199 
200     // Expand integer operations supported for scalars but not SIMD
201     for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV,
202                     ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})
203       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
204         setOperationAction(Op, T, Expand);
205 
206     // But we do have integer min and max operations
207     for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
208       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
209         setOperationAction(Op, T, Legal);
210 
211     // Expand float operations supported for scalars but not SIMD
212     for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
213                     ISD::FEXP, ISD::FEXP2, ISD::FRINT})
214       for (auto T : {MVT::v4f32, MVT::v2f64})
215         setOperationAction(Op, T, Expand);
216 
217     // Unsigned comparison operations are unavailable for i64x2 vectors.
218     for (auto CC : {ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE})
219       setCondCodeAction(CC, MVT::v2i64, Custom);
220 
221     // 64x2 conversions are not in the spec
222     for (auto Op :
223          {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT})
224       for (auto T : {MVT::v2i64, MVT::v2f64})
225         setOperationAction(Op, T, Expand);
226 
227     // But saturating fp_to_int converstions are
228     for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT})
229       setOperationAction(Op, MVT::v4i32, Custom);
230   }
231 
232   // As a special case, these operators use the type to mean the type to
233   // sign-extend from.
234   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
235   if (!Subtarget->hasSignExt()) {
236     // Sign extends are legal only when extending a vector extract
237     auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
238     for (auto T : {MVT::i8, MVT::i16, MVT::i32})
239       setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action);
240   }
241   for (auto T : MVT::integer_fixedlen_vector_valuetypes())
242     setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
243 
244   // Dynamic stack allocation: use the default expansion.
245   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
246   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
247   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);
248 
249   setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
250   setOperationAction(ISD::FrameIndex, MVT::i64, Custom);
251   setOperationAction(ISD::CopyToReg, MVT::Other, Custom);
252 
253   // Expand these forms; we pattern-match the forms that we can handle in isel.
254   for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
255     for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
256       setOperationAction(Op, T, Expand);
257 
258   // We have custom switch handling.
259   setOperationAction(ISD::BR_JT, MVT::Other, Custom);
260 
261   // WebAssembly doesn't have:
262   //  - Floating-point extending loads.
263   //  - Floating-point truncating stores.
264   //  - i1 extending loads.
265   //  - truncating SIMD stores and most extending loads
266   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
267   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
268   for (auto T : MVT::integer_valuetypes())
269     for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
270       setLoadExtAction(Ext, T, MVT::i1, Promote);
271   if (Subtarget->hasSIMD128()) {
272     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
273                    MVT::v2f64}) {
274       for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
275         if (MVT(T) != MemT) {
276           setTruncStoreAction(T, MemT, Expand);
277           for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
278             setLoadExtAction(Ext, T, MemT, Expand);
279         }
280       }
281     }
282     // But some vector extending loads are legal
283     for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
284       setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
285       setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
286       setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
287     }
288     // And some truncating stores are legal as well
289     setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
290     setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
291   }
292 
293   // Don't do anything clever with build_pairs
294   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
295 
296   // Trap lowers to wasm unreachable
297   setOperationAction(ISD::TRAP, MVT::Other, Legal);
298   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
299 
300   // Exception handling intrinsics
301   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
302   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
303   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
304 
305   setMaxAtomicSizeInBitsSupported(64);
306 
307   // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is
308   // consistent with the f64 and f128 names.
309   setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
310   setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
311 
312   // Define the emscripten name for return address helper.
313   // TODO: when implementing other Wasm backends, make this generic or only do
314   // this on emscripten depending on what they end up doing.
315   setLibcallName(RTLIB::RETURN_ADDRESS, "emscripten_return_address");
316 
317   // Always convert switches to br_tables unless there is only one case, which
318   // is equivalent to a simple branch. This reduces code size for wasm, and we
319   // defer possible jump table optimizations to the VM.
320   setMinimumJumpTableEntries(2);
321 }
322 
323 TargetLowering::AtomicExpansionKind
324 WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
325   // We have wasm instructions for these
326   switch (AI->getOperation()) {
327   case AtomicRMWInst::Add:
328   case AtomicRMWInst::Sub:
329   case AtomicRMWInst::And:
330   case AtomicRMWInst::Or:
331   case AtomicRMWInst::Xor:
332   case AtomicRMWInst::Xchg:
333     return AtomicExpansionKind::None;
334   default:
335     break;
336   }
337   return AtomicExpansionKind::CmpXChg;
338 }
339 
340 bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
341   // Implementation copied from X86TargetLowering.
342   unsigned Opc = VecOp.getOpcode();
343 
344   // Assume target opcodes can't be scalarized.
345   // TODO - do we have any exceptions?
346   if (Opc >= ISD::BUILTIN_OP_END)
347     return false;
348 
349   // If the vector op is not supported, try to convert to scalar.
350   EVT VecVT = VecOp.getValueType();
351   if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
352     return true;
353 
354   // If the vector op is supported, but the scalar op is not, the transform may
355   // not be worthwhile.
356   EVT ScalarVT = VecVT.getScalarType();
357   return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
358 }
359 
360 FastISel *WebAssemblyTargetLowering::createFastISel(
361     FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
362   return WebAssembly::createFastISel(FuncInfo, LibInfo);
363 }
364 
365 MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
366                                                       EVT VT) const {
367   unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
368   if (BitWidth > 1 && BitWidth < 8)
369     BitWidth = 8;
370 
371   if (BitWidth > 64) {
372     // The shift will be lowered to a libcall, and compiler-rt libcalls expect
373     // the count to be an i32.
374     BitWidth = 32;
375     assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) &&
376            "32-bit shift counts ought to be enough for anyone");
377   }
378 
379   MVT Result = MVT::getIntegerVT(BitWidth);
380   assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE &&
381          "Unable to represent scalar shift amount type");
382   return Result;
383 }
384 
385 // Lower an fp-to-int conversion operator from the LLVM opcode, which has an
386 // undefined result on invalid/overflow, to the WebAssembly opcode, which
387 // traps on invalid/overflow.
388 static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
389                                        MachineBasicBlock *BB,
390                                        const TargetInstrInfo &TII,
391                                        bool IsUnsigned, bool Int64,
392                                        bool Float64, unsigned LoweredOpcode) {
393   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
394 
395   Register OutReg = MI.getOperand(0).getReg();
396   Register InReg = MI.getOperand(1).getReg();
397 
398   unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
399   unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
400   unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
401   unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
402   unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
403   unsigned Eqz = WebAssembly::EQZ_I32;
404   unsigned And = WebAssembly::AND_I32;
405   int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
406   int64_t Substitute = IsUnsigned ? 0 : Limit;
407   double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
408   auto &Context = BB->getParent()->getFunction().getContext();
409   Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
410 
411   const BasicBlock *LLVMBB = BB->getBasicBlock();
412   MachineFunction *F = BB->getParent();
413   MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
414   MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
415   MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
416 
417   MachineFunction::iterator It = ++BB->getIterator();
418   F->insert(It, FalseMBB);
419   F->insert(It, TrueMBB);
420   F->insert(It, DoneMBB);
421 
422   // Transfer the remainder of BB and its successor edges to DoneMBB.
423   DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
424   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
425 
426   BB->addSuccessor(TrueMBB);
427   BB->addSuccessor(FalseMBB);
428   TrueMBB->addSuccessor(DoneMBB);
429   FalseMBB->addSuccessor(DoneMBB);
430 
431   unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
432   Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
433   Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
434   CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
435   EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
436   FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
437   TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
438 
439   MI.eraseFromParent();
440   // For signed numbers, we can do a single comparison to determine whether
441   // fabs(x) is within range.
442   if (IsUnsigned) {
443     Tmp0 = InReg;
444   } else {
445     BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
446   }
447   BuildMI(BB, DL, TII.get(FConst), Tmp1)
448       .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
449   BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
450 
451   // For unsigned numbers, we have to do a separate comparison with zero.
452   if (IsUnsigned) {
453     Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
454     Register SecondCmpReg =
455         MRI.createVirtualRegister(&WebAssembly::I32RegClass);
456     Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
457     BuildMI(BB, DL, TII.get(FConst), Tmp1)
458         .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
459     BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
460     BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
461     CmpReg = AndReg;
462   }
463 
464   BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
465 
466   // Create the CFG diamond to select between doing the conversion or using
467   // the substitute value.
468   BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
469   BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
470   BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
471   BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
472   BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
473       .addReg(FalseReg)
474       .addMBB(FalseMBB)
475       .addReg(TrueReg)
476       .addMBB(TrueMBB);
477 
478   return DoneMBB;
479 }
480 
481 static MachineBasicBlock *
482 LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB,
483                  const WebAssemblySubtarget *Subtarget,
484                  const TargetInstrInfo &TII) {
485   MachineInstr &CallParams = *CallResults.getPrevNode();
486   assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
487   assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
488          CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
489 
490   bool IsIndirect = CallParams.getOperand(0).isReg();
491   bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
492 
493   unsigned CallOp;
494   if (IsIndirect && IsRetCall) {
495     CallOp = WebAssembly::RET_CALL_INDIRECT;
496   } else if (IsIndirect) {
497     CallOp = WebAssembly::CALL_INDIRECT;
498   } else if (IsRetCall) {
499     CallOp = WebAssembly::RET_CALL;
500   } else {
501     CallOp = WebAssembly::CALL;
502   }
503 
504   MachineFunction &MF = *BB->getParent();
505   const MCInstrDesc &MCID = TII.get(CallOp);
506   MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
507 
508   // See if we must truncate the function pointer.
509   // CALL_INDIRECT takes an i32, but in wasm64 we represent function pointers
510   // as 64-bit for uniformity with other pointer types.
511   // See also: WebAssemblyFastISel::selectCall
512   if (IsIndirect && MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()) {
513     Register Reg32 =
514         MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
515     auto &FnPtr = CallParams.getOperand(0);
516     BuildMI(*BB, CallResults.getIterator(), DL,
517             TII.get(WebAssembly::I32_WRAP_I64), Reg32)
518         .addReg(FnPtr.getReg());
519     FnPtr.setReg(Reg32);
520   }
521 
522   // Move the function pointer to the end of the arguments for indirect calls
523   if (IsIndirect) {
524     auto FnPtr = CallParams.getOperand(0);
525     CallParams.RemoveOperand(0);
526     CallParams.addOperand(FnPtr);
527   }
528 
529   for (auto Def : CallResults.defs())
530     MIB.add(Def);
531 
532   if (IsIndirect) {
533     // Placeholder for the type index.
534     MIB.addImm(0);
535     // The table into which this call_indirect indexes.
536     MCSymbolWasm *Table =
537         WebAssembly::getOrCreateFunctionTableSymbol(MF.getContext(), Subtarget);
538     if (Subtarget->hasReferenceTypes()) {
539       MIB.addSym(Table);
540     } else {
541       // For the MVP there is at most one table whose number is 0, but we can't
542       // write a table symbol or issue relocations.  Instead we just ensure the
543       // table is live and write a zero.
544       Table->setNoStrip();
545       MIB.addImm(0);
546     }
547   }
548 
549   for (auto Use : CallParams.uses())
550     MIB.add(Use);
551 
552   BB->insert(CallResults.getIterator(), MIB);
553   CallParams.eraseFromParent();
554   CallResults.eraseFromParent();
555 
556   return BB;
557 }
558 
559 MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
560     MachineInstr &MI, MachineBasicBlock *BB) const {
561   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
562   DebugLoc DL = MI.getDebugLoc();
563 
564   switch (MI.getOpcode()) {
565   default:
566     llvm_unreachable("Unexpected instr type to insert");
567   case WebAssembly::FP_TO_SINT_I32_F32:
568     return LowerFPToInt(MI, DL, BB, TII, false, false, false,
569                         WebAssembly::I32_TRUNC_S_F32);
570   case WebAssembly::FP_TO_UINT_I32_F32:
571     return LowerFPToInt(MI, DL, BB, TII, true, false, false,
572                         WebAssembly::I32_TRUNC_U_F32);
573   case WebAssembly::FP_TO_SINT_I64_F32:
574     return LowerFPToInt(MI, DL, BB, TII, false, true, false,
575                         WebAssembly::I64_TRUNC_S_F32);
576   case WebAssembly::FP_TO_UINT_I64_F32:
577     return LowerFPToInt(MI, DL, BB, TII, true, true, false,
578                         WebAssembly::I64_TRUNC_U_F32);
579   case WebAssembly::FP_TO_SINT_I32_F64:
580     return LowerFPToInt(MI, DL, BB, TII, false, false, true,
581                         WebAssembly::I32_TRUNC_S_F64);
582   case WebAssembly::FP_TO_UINT_I32_F64:
583     return LowerFPToInt(MI, DL, BB, TII, true, false, true,
584                         WebAssembly::I32_TRUNC_U_F64);
585   case WebAssembly::FP_TO_SINT_I64_F64:
586     return LowerFPToInt(MI, DL, BB, TII, false, true, true,
587                         WebAssembly::I64_TRUNC_S_F64);
588   case WebAssembly::FP_TO_UINT_I64_F64:
589     return LowerFPToInt(MI, DL, BB, TII, true, true, true,
590                         WebAssembly::I64_TRUNC_U_F64);
591   case WebAssembly::CALL_RESULTS:
592   case WebAssembly::RET_CALL_RESULTS:
593     return LowerCallResults(MI, DL, BB, Subtarget, TII);
594   }
595 }
596 
597 const char *
598 WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {
599   switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
600   case WebAssemblyISD::FIRST_NUMBER:
601   case WebAssemblyISD::FIRST_MEM_OPCODE:
602     break;
603 #define HANDLE_NODETYPE(NODE)                                                  \
604   case WebAssemblyISD::NODE:                                                   \
605     return "WebAssemblyISD::" #NODE;
606 #define HANDLE_MEM_NODETYPE(NODE) HANDLE_NODETYPE(NODE)
607 #include "WebAssemblyISD.def"
608 #undef HANDLE_MEM_NODETYPE
609 #undef HANDLE_NODETYPE
610   }
611   return nullptr;
612 }
613 
614 std::pair<unsigned, const TargetRegisterClass *>
615 WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
616     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
617   // First, see if this is a constraint that directly corresponds to a
618   // WebAssembly register class.
619   if (Constraint.size() == 1) {
620     switch (Constraint[0]) {
621     case 'r':
622       assert(VT != MVT::iPTR && "Pointer MVT not expected here");
623       if (Subtarget->hasSIMD128() && VT.isVector()) {
624         if (VT.getSizeInBits() == 128)
625           return std::make_pair(0U, &WebAssembly::V128RegClass);
626       }
627       if (VT.isInteger() && !VT.isVector()) {
628         if (VT.getSizeInBits() <= 32)
629           return std::make_pair(0U, &WebAssembly::I32RegClass);
630         if (VT.getSizeInBits() <= 64)
631           return std::make_pair(0U, &WebAssembly::I64RegClass);
632       }
633       if (VT.isFloatingPoint() && !VT.isVector()) {
634         switch (VT.getSizeInBits()) {
635         case 32:
636           return std::make_pair(0U, &WebAssembly::F32RegClass);
637         case 64:
638           return std::make_pair(0U, &WebAssembly::F64RegClass);
639         default:
640           break;
641         }
642       }
643       break;
644     default:
645       break;
646     }
647   }
648 
649   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
650 }
651 
652 bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const {
653   // Assume ctz is a relatively cheap operation.
654   return true;
655 }
656 
657 bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const {
658   // Assume clz is a relatively cheap operation.
659   return true;
660 }
661 
662 bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
663                                                       const AddrMode &AM,
664                                                       Type *Ty, unsigned AS,
665                                                       Instruction *I) const {
666   // WebAssembly offsets are added as unsigned without wrapping. The
667   // isLegalAddressingMode gives us no way to determine if wrapping could be
668   // happening, so we approximate this by accepting only non-negative offsets.
669   if (AM.BaseOffs < 0)
670     return false;
671 
672   // WebAssembly has no scale register operands.
673   if (AM.Scale != 0)
674     return false;
675 
676   // Everything else is legal.
677   return true;
678 }
679 
680 bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
681     EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/,
682     MachineMemOperand::Flags /*Flags*/, bool *Fast) const {
683   // WebAssembly supports unaligned accesses, though it should be declared
684   // with the p2align attribute on loads and stores which do so, and there
685   // may be a performance impact. We tell LLVM they're "fast" because
686   // for the kinds of things that LLVM uses this for (merging adjacent stores
687   // of constants, etc.), WebAssembly implementations will either want the
688   // unaligned access or they'll split anyway.
689   if (Fast)
690     *Fast = true;
691   return true;
692 }
693 
694 bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
695                                               AttributeList Attr) const {
696   // The current thinking is that wasm engines will perform this optimization,
697   // so we can save on code size.
698   return true;
699 }
700 
701 bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
702   EVT ExtT = ExtVal.getValueType();
703   EVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getValueType(0);
704   return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
705          (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
706          (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
707 }
708 
709 EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
710                                                   LLVMContext &C,
711                                                   EVT VT) const {
712   if (VT.isVector())
713     return VT.changeVectorElementTypeToInteger();
714 
715   // So far, all branch instructions in Wasm take an I32 condition.
716   // The default TargetLowering::getSetCCResultType returns the pointer size,
717   // which would be useful to reduce instruction counts when testing
718   // against 64-bit pointers/values if at some point Wasm supports that.
719   return EVT::getIntegerVT(C, 32);
720 }
721 
722 bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
723                                                    const CallInst &I,
724                                                    MachineFunction &MF,
725                                                    unsigned Intrinsic) const {
726   switch (Intrinsic) {
727   case Intrinsic::wasm_memory_atomic_notify:
728     Info.opc = ISD::INTRINSIC_W_CHAIN;
729     Info.memVT = MVT::i32;
730     Info.ptrVal = I.getArgOperand(0);
731     Info.offset = 0;
732     Info.align = Align(4);
733     // atomic.notify instruction does not really load the memory specified with
734     // this argument, but MachineMemOperand should either be load or store, so
735     // we set this to a load.
736     // FIXME Volatile isn't really correct, but currently all LLVM atomic
737     // instructions are treated as volatiles in the backend, so we should be
738     // consistent. The same applies for wasm_atomic_wait intrinsics too.
739     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
740     return true;
741   case Intrinsic::wasm_memory_atomic_wait32:
742     Info.opc = ISD::INTRINSIC_W_CHAIN;
743     Info.memVT = MVT::i32;
744     Info.ptrVal = I.getArgOperand(0);
745     Info.offset = 0;
746     Info.align = Align(4);
747     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
748     return true;
749   case Intrinsic::wasm_memory_atomic_wait64:
750     Info.opc = ISD::INTRINSIC_W_CHAIN;
751     Info.memVT = MVT::i64;
752     Info.ptrVal = I.getArgOperand(0);
753     Info.offset = 0;
754     Info.align = Align(8);
755     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
756     return true;
757   case Intrinsic::wasm_load32_zero:
758   case Intrinsic::wasm_load64_zero:
759     Info.opc = ISD::INTRINSIC_W_CHAIN;
760     Info.memVT = Intrinsic == Intrinsic::wasm_load32_zero ? MVT::i32 : MVT::i64;
761     Info.ptrVal = I.getArgOperand(0);
762     Info.offset = 0;
763     Info.align = Align(1);
764     Info.flags = MachineMemOperand::MOLoad;
765     return true;
766   case Intrinsic::wasm_load8_lane:
767   case Intrinsic::wasm_load16_lane:
768   case Intrinsic::wasm_load32_lane:
769   case Intrinsic::wasm_load64_lane:
770   case Intrinsic::wasm_store8_lane:
771   case Intrinsic::wasm_store16_lane:
772   case Intrinsic::wasm_store32_lane:
773   case Intrinsic::wasm_store64_lane: {
774     MVT MemVT;
775     switch (Intrinsic) {
776     case Intrinsic::wasm_load8_lane:
777     case Intrinsic::wasm_store8_lane:
778       MemVT = MVT::i8;
779       break;
780     case Intrinsic::wasm_load16_lane:
781     case Intrinsic::wasm_store16_lane:
782       MemVT = MVT::i16;
783       break;
784     case Intrinsic::wasm_load32_lane:
785     case Intrinsic::wasm_store32_lane:
786       MemVT = MVT::i32;
787       break;
788     case Intrinsic::wasm_load64_lane:
789     case Intrinsic::wasm_store64_lane:
790       MemVT = MVT::i64;
791       break;
792     default:
793       llvm_unreachable("unexpected intrinsic");
794     }
795     if (Intrinsic == Intrinsic::wasm_load8_lane ||
796         Intrinsic == Intrinsic::wasm_load16_lane ||
797         Intrinsic == Intrinsic::wasm_load32_lane ||
798         Intrinsic == Intrinsic::wasm_load64_lane) {
799       Info.opc = ISD::INTRINSIC_W_CHAIN;
800       Info.flags = MachineMemOperand::MOLoad;
801     } else {
802       Info.opc = ISD::INTRINSIC_VOID;
803       Info.flags = MachineMemOperand::MOStore;
804     }
805     Info.ptrVal = I.getArgOperand(0);
806     Info.memVT = MemVT;
807     Info.offset = 0;
808     Info.align = Align(1);
809     return true;
810   }
811   default:
812     return false;
813   }
814 }
815 
816 //===----------------------------------------------------------------------===//
817 // WebAssembly Lowering private implementation.
818 //===----------------------------------------------------------------------===//
819 
820 //===----------------------------------------------------------------------===//
821 // Lowering Code
822 //===----------------------------------------------------------------------===//
823 
824 static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
825   MachineFunction &MF = DAG.getMachineFunction();
826   DAG.getContext()->diagnose(
827       DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
828 }
829 
830 // Test whether the given calling convention is supported.
831 static bool callingConvSupported(CallingConv::ID CallConv) {
832   // We currently support the language-independent target-independent
833   // conventions. We don't yet have a way to annotate calls with properties like
834   // "cold", and we don't have any call-clobbered registers, so these are mostly
835   // all handled the same.
836   return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
837          CallConv == CallingConv::Cold ||
838          CallConv == CallingConv::PreserveMost ||
839          CallConv == CallingConv::PreserveAll ||
840          CallConv == CallingConv::CXX_FAST_TLS ||
841          CallConv == CallingConv::WASM_EmscriptenInvoke ||
842          CallConv == CallingConv::Swift;
843 }
844 
845 SDValue
846 WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
847                                      SmallVectorImpl<SDValue> &InVals) const {
848   SelectionDAG &DAG = CLI.DAG;
849   SDLoc DL = CLI.DL;
850   SDValue Chain = CLI.Chain;
851   SDValue Callee = CLI.Callee;
852   MachineFunction &MF = DAG.getMachineFunction();
853   auto Layout = MF.getDataLayout();
854 
855   CallingConv::ID CallConv = CLI.CallConv;
856   if (!callingConvSupported(CallConv))
857     fail(DL, DAG,
858          "WebAssembly doesn't support language-specific or target-specific "
859          "calling conventions yet");
860   if (CLI.IsPatchPoint)
861     fail(DL, DAG, "WebAssembly doesn't support patch point yet");
862 
863   if (CLI.IsTailCall) {
864     auto NoTail = [&](const char *Msg) {
865       if (CLI.CB && CLI.CB->isMustTailCall())
866         fail(DL, DAG, Msg);
867       CLI.IsTailCall = false;
868     };
869 
870     if (!Subtarget->hasTailCall())
871       NoTail("WebAssembly 'tail-call' feature not enabled");
872 
873     // Varargs calls cannot be tail calls because the buffer is on the stack
874     if (CLI.IsVarArg)
875       NoTail("WebAssembly does not support varargs tail calls");
876 
877     // Do not tail call unless caller and callee return types match
878     const Function &F = MF.getFunction();
879     const TargetMachine &TM = getTargetMachine();
880     Type *RetTy = F.getReturnType();
881     SmallVector<MVT, 4> CallerRetTys;
882     SmallVector<MVT, 4> CalleeRetTys;
883     computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
884     computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
885     bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
886                       std::equal(CallerRetTys.begin(), CallerRetTys.end(),
887                                  CalleeRetTys.begin());
888     if (!TypesMatch)
889       NoTail("WebAssembly tail call requires caller and callee return types to "
890              "match");
891 
892     // If pointers to local stack values are passed, we cannot tail call
893     if (CLI.CB) {
894       for (auto &Arg : CLI.CB->args()) {
895         Value *Val = Arg.get();
896         // Trace the value back through pointer operations
897         while (true) {
898           Value *Src = Val->stripPointerCastsAndAliases();
899           if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
900             Src = GEP->getPointerOperand();
901           if (Val == Src)
902             break;
903           Val = Src;
904         }
905         if (isa<AllocaInst>(Val)) {
906           NoTail(
907               "WebAssembly does not support tail calling with stack arguments");
908           break;
909         }
910       }
911     }
912   }
913 
914   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
915   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
916   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
917 
918   // The generic code may have added an sret argument. If we're lowering an
919   // invoke function, the ABI requires that the function pointer be the first
920   // argument, so we may have to swap the arguments.
921   if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
922       Outs[0].Flags.isSRet()) {
923     std::swap(Outs[0], Outs[1]);
924     std::swap(OutVals[0], OutVals[1]);
925   }
926 
927   bool HasSwiftSelfArg = false;
928   bool HasSwiftErrorArg = false;
929   unsigned NumFixedArgs = 0;
930   for (unsigned I = 0; I < Outs.size(); ++I) {
931     const ISD::OutputArg &Out = Outs[I];
932     SDValue &OutVal = OutVals[I];
933     HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
934     HasSwiftErrorArg |= Out.Flags.isSwiftError();
935     if (Out.Flags.isNest())
936       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
937     if (Out.Flags.isInAlloca())
938       fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
939     if (Out.Flags.isInConsecutiveRegs())
940       fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
941     if (Out.Flags.isInConsecutiveRegsLast())
942       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
943     if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
944       auto &MFI = MF.getFrameInfo();
945       int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
946                                      Out.Flags.getNonZeroByValAlign(),
947                                      /*isSS=*/false);
948       SDValue SizeNode =
949           DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
950       SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
951       Chain = DAG.getMemcpy(
952           Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getNonZeroByValAlign(),
953           /*isVolatile*/ false, /*AlwaysInline=*/false,
954           /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
955       OutVal = FINode;
956     }
957     // Count the number of fixed args *after* legalization.
958     NumFixedArgs += Out.IsFixed;
959   }
960 
961   bool IsVarArg = CLI.IsVarArg;
962   auto PtrVT = getPointerTy(Layout);
963 
964   // For swiftcc, emit additional swiftself and swifterror arguments
965   // if there aren't. These additional arguments are also added for callee
966   // signature They are necessary to match callee and caller signature for
967   // indirect call.
968   if (CallConv == CallingConv::Swift) {
969     if (!HasSwiftSelfArg) {
970       NumFixedArgs++;
971       ISD::OutputArg Arg;
972       Arg.Flags.setSwiftSelf();
973       CLI.Outs.push_back(Arg);
974       SDValue ArgVal = DAG.getUNDEF(PtrVT);
975       CLI.OutVals.push_back(ArgVal);
976     }
977     if (!HasSwiftErrorArg) {
978       NumFixedArgs++;
979       ISD::OutputArg Arg;
980       Arg.Flags.setSwiftError();
981       CLI.Outs.push_back(Arg);
982       SDValue ArgVal = DAG.getUNDEF(PtrVT);
983       CLI.OutVals.push_back(ArgVal);
984     }
985   }
986 
987   // Analyze operands of the call, assigning locations to each operand.
988   SmallVector<CCValAssign, 16> ArgLocs;
989   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
990 
991   if (IsVarArg) {
992     // Outgoing non-fixed arguments are placed in a buffer. First
993     // compute their offsets and the total amount of buffer space needed.
994     for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
995       const ISD::OutputArg &Out = Outs[I];
996       SDValue &Arg = OutVals[I];
997       EVT VT = Arg.getValueType();
998       assert(VT != MVT::iPTR && "Legalized args should be concrete");
999       Type *Ty = VT.getTypeForEVT(*DAG.getContext());
1000       Align Alignment =
1001           std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));
1002       unsigned Offset =
1003           CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);
1004       CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
1005                                         Offset, VT.getSimpleVT(),
1006                                         CCValAssign::Full));
1007     }
1008   }
1009 
1010   unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
1011 
1012   SDValue FINode;
1013   if (IsVarArg && NumBytes) {
1014     // For non-fixed arguments, next emit stores to store the argument values
1015     // to the stack buffer at the offsets computed above.
1016     int FI = MF.getFrameInfo().CreateStackObject(NumBytes,
1017                                                  Layout.getStackAlignment(),
1018                                                  /*isSS=*/false);
1019     unsigned ValNo = 0;
1020     SmallVector<SDValue, 8> Chains;
1021     for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) {
1022       assert(ArgLocs[ValNo].getValNo() == ValNo &&
1023              "ArgLocs should remain in order and only hold varargs args");
1024       unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
1025       FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1026       SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
1027                                 DAG.getConstant(Offset, DL, PtrVT));
1028       Chains.push_back(
1029           DAG.getStore(Chain, DL, Arg, Add,
1030                        MachinePointerInfo::getFixedStack(MF, FI, Offset)));
1031     }
1032     if (!Chains.empty())
1033       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
1034   } else if (IsVarArg) {
1035     FINode = DAG.getIntPtrConstant(0, DL);
1036   }
1037 
1038   if (Callee->getOpcode() == ISD::GlobalAddress) {
1039     // If the callee is a GlobalAddress node (quite common, every direct call
1040     // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1041     // doesn't at MO_GOT which is not needed for direct calls.
1042     GlobalAddressSDNode* GA = cast<GlobalAddressSDNode>(Callee);
1043     Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
1044                                         getPointerTy(DAG.getDataLayout()),
1045                                         GA->getOffset());
1046     Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
1047                          getPointerTy(DAG.getDataLayout()), Callee);
1048   }
1049 
1050   // Compute the operands for the CALLn node.
1051   SmallVector<SDValue, 16> Ops;
1052   Ops.push_back(Chain);
1053   Ops.push_back(Callee);
1054 
1055   // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1056   // isn't reliable.
1057   Ops.append(OutVals.begin(),
1058              IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
1059   // Add a pointer to the vararg buffer.
1060   if (IsVarArg)
1061     Ops.push_back(FINode);
1062 
1063   SmallVector<EVT, 8> InTys;
1064   for (const auto &In : Ins) {
1065     assert(!In.Flags.isByVal() && "byval is not valid for return values");
1066     assert(!In.Flags.isNest() && "nest is not valid for return values");
1067     if (In.Flags.isInAlloca())
1068       fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
1069     if (In.Flags.isInConsecutiveRegs())
1070       fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
1071     if (In.Flags.isInConsecutiveRegsLast())
1072       fail(DL, DAG,
1073            "WebAssembly hasn't implemented cons regs last return values");
1074     // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1075     // registers.
1076     InTys.push_back(In.VT);
1077   }
1078 
1079   if (CLI.IsTailCall) {
1080     // ret_calls do not return values to the current frame
1081     SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1082     return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
1083   }
1084 
1085   InTys.push_back(MVT::Other);
1086   SDVTList InTyList = DAG.getVTList(InTys);
1087   SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
1088 
1089   for (size_t I = 0; I < Ins.size(); ++I)
1090     InVals.push_back(Res.getValue(I));
1091 
1092   // Return the chain
1093   return Res.getValue(Ins.size());
1094 }
1095 
1096 bool WebAssemblyTargetLowering::CanLowerReturn(
1097     CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
1098     const SmallVectorImpl<ISD::OutputArg> &Outs,
1099     LLVMContext & /*Context*/) const {
1100   // WebAssembly can only handle returning tuples with multivalue enabled
1101   return Subtarget->hasMultivalue() || Outs.size() <= 1;
1102 }
1103 
1104 SDValue WebAssemblyTargetLowering::LowerReturn(
1105     SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
1106     const SmallVectorImpl<ISD::OutputArg> &Outs,
1107     const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1108     SelectionDAG &DAG) const {
1109   assert((Subtarget->hasMultivalue() || Outs.size() <= 1) &&
1110          "MVP WebAssembly can only return up to one value");
1111   if (!callingConvSupported(CallConv))
1112     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1113 
1114   SmallVector<SDValue, 4> RetOps(1, Chain);
1115   RetOps.append(OutVals.begin(), OutVals.end());
1116   Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
1117 
1118   // Record the number and types of the return values.
1119   for (const ISD::OutputArg &Out : Outs) {
1120     assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1121     assert(!Out.Flags.isNest() && "nest is not valid for return values");
1122     assert(Out.IsFixed && "non-fixed return value is not valid");
1123     if (Out.Flags.isInAlloca())
1124       fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
1125     if (Out.Flags.isInConsecutiveRegs())
1126       fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
1127     if (Out.Flags.isInConsecutiveRegsLast())
1128       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
1129   }
1130 
1131   return Chain;
1132 }
1133 
1134 SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1135     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1136     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1137     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1138   if (!callingConvSupported(CallConv))
1139     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1140 
1141   MachineFunction &MF = DAG.getMachineFunction();
1142   auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1143 
1144   // Set up the incoming ARGUMENTS value, which serves to represent the liveness
1145   // of the incoming values before they're represented by virtual registers.
1146   MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
1147 
1148   bool HasSwiftErrorArg = false;
1149   bool HasSwiftSelfArg = false;
1150   for (const ISD::InputArg &In : Ins) {
1151     HasSwiftSelfArg |= In.Flags.isSwiftSelf();
1152     HasSwiftErrorArg |= In.Flags.isSwiftError();
1153     if (In.Flags.isInAlloca())
1154       fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1155     if (In.Flags.isNest())
1156       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1157     if (In.Flags.isInConsecutiveRegs())
1158       fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1159     if (In.Flags.isInConsecutiveRegsLast())
1160       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1161     // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1162     // registers.
1163     InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
1164                                            DAG.getTargetConstant(InVals.size(),
1165                                                                  DL, MVT::i32))
1166                              : DAG.getUNDEF(In.VT));
1167 
1168     // Record the number and types of arguments.
1169     MFI->addParam(In.VT);
1170   }
1171 
1172   // For swiftcc, emit additional swiftself and swifterror arguments
1173   // if there aren't. These additional arguments are also added for callee
1174   // signature They are necessary to match callee and caller signature for
1175   // indirect call.
1176   auto PtrVT = getPointerTy(MF.getDataLayout());
1177   if (CallConv == CallingConv::Swift) {
1178     if (!HasSwiftSelfArg) {
1179       MFI->addParam(PtrVT);
1180     }
1181     if (!HasSwiftErrorArg) {
1182       MFI->addParam(PtrVT);
1183     }
1184   }
1185   // Varargs are copied into a buffer allocated by the caller, and a pointer to
1186   // the buffer is passed as an argument.
1187   if (IsVarArg) {
1188     MVT PtrVT = getPointerTy(MF.getDataLayout());
1189     Register VarargVreg =
1190         MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT));
1191     MFI->setVarargBufferVreg(VarargVreg);
1192     Chain = DAG.getCopyToReg(
1193         Chain, DL, VarargVreg,
1194         DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
1195                     DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
1196     MFI->addParam(PtrVT);
1197   }
1198 
1199   // Record the number and types of arguments and results.
1200   SmallVector<MVT, 4> Params;
1201   SmallVector<MVT, 4> Results;
1202   computeSignatureVTs(MF.getFunction().getFunctionType(), &MF.getFunction(),
1203                       MF.getFunction(), DAG.getTarget(), Params, Results);
1204   for (MVT VT : Results)
1205     MFI->addResult(VT);
1206   // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1207   // the param logic here with ComputeSignatureVTs
1208   assert(MFI->getParams().size() == Params.size() &&
1209          std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1210                     Params.begin()));
1211 
1212   return Chain;
1213 }
1214 
1215 void WebAssemblyTargetLowering::ReplaceNodeResults(
1216     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
1217   switch (N->getOpcode()) {
1218   case ISD::SIGN_EXTEND_INREG:
1219     // Do not add any results, signifying that N should not be custom lowered
1220     // after all. This happens because simd128 turns on custom lowering for
1221     // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1222     // illegal type.
1223     break;
1224   default:
1225     llvm_unreachable(
1226         "ReplaceNodeResults not implemented for this op for WebAssembly!");
1227   }
1228 }
1229 
1230 //===----------------------------------------------------------------------===//
1231 //  Custom lowering hooks.
1232 //===----------------------------------------------------------------------===//
1233 
1234 SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1235                                                   SelectionDAG &DAG) const {
1236   SDLoc DL(Op);
1237   switch (Op.getOpcode()) {
1238   default:
1239     llvm_unreachable("unimplemented operation lowering");
1240     return SDValue();
1241   case ISD::FrameIndex:
1242     return LowerFrameIndex(Op, DAG);
1243   case ISD::GlobalAddress:
1244     return LowerGlobalAddress(Op, DAG);
1245   case ISD::GlobalTLSAddress:
1246     return LowerGlobalTLSAddress(Op, DAG);
1247   case ISD::ExternalSymbol:
1248     return LowerExternalSymbol(Op, DAG);
1249   case ISD::JumpTable:
1250     return LowerJumpTable(Op, DAG);
1251   case ISD::BR_JT:
1252     return LowerBR_JT(Op, DAG);
1253   case ISD::VASTART:
1254     return LowerVASTART(Op, DAG);
1255   case ISD::BlockAddress:
1256   case ISD::BRIND:
1257     fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1258     return SDValue();
1259   case ISD::RETURNADDR:
1260     return LowerRETURNADDR(Op, DAG);
1261   case ISD::FRAMEADDR:
1262     return LowerFRAMEADDR(Op, DAG);
1263   case ISD::CopyToReg:
1264     return LowerCopyToReg(Op, DAG);
1265   case ISD::EXTRACT_VECTOR_ELT:
1266   case ISD::INSERT_VECTOR_ELT:
1267     return LowerAccessVectorElement(Op, DAG);
1268   case ISD::INTRINSIC_VOID:
1269   case ISD::INTRINSIC_WO_CHAIN:
1270   case ISD::INTRINSIC_W_CHAIN:
1271     return LowerIntrinsic(Op, DAG);
1272   case ISD::SIGN_EXTEND_INREG:
1273     return LowerSIGN_EXTEND_INREG(Op, DAG);
1274   case ISD::BUILD_VECTOR:
1275     return LowerBUILD_VECTOR(Op, DAG);
1276   case ISD::VECTOR_SHUFFLE:
1277     return LowerVECTOR_SHUFFLE(Op, DAG);
1278   case ISD::SETCC:
1279     return LowerSETCC(Op, DAG);
1280   case ISD::SHL:
1281   case ISD::SRA:
1282   case ISD::SRL:
1283     return LowerShift(Op, DAG);
1284   case ISD::FP_TO_SINT_SAT:
1285   case ISD::FP_TO_UINT_SAT:
1286     return LowerFP_TO_INT_SAT(Op, DAG);
1287   case ISD::LOAD:
1288     return LowerLoad(Op, DAG);
1289   case ISD::STORE:
1290     return LowerStore(Op, DAG);
1291   }
1292 }
1293 
1294 static bool IsWebAssemblyGlobal(SDValue Op) {
1295   if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
1296     return WebAssembly::isWasmVarAddressSpace(GA->getAddressSpace());
1297 
1298   return false;
1299 }
1300 
1301 static Optional<unsigned> IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG) {
1302   const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op);
1303   if (!FI)
1304     return None;
1305 
1306   auto &MF = DAG.getMachineFunction();
1307   return WebAssemblyFrameLowering::getLocalForStackObject(MF, FI->getIndex());
1308 }
1309 
1310 SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
1311                                               SelectionDAG &DAG) const {
1312   SDLoc DL(Op);
1313   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
1314   const SDValue &Value = SN->getValue();
1315   const SDValue &Base = SN->getBasePtr();
1316   const SDValue &Offset = SN->getOffset();
1317 
1318   if (IsWebAssemblyGlobal(Base)) {
1319     if (!Offset->isUndef())
1320       report_fatal_error("unexpected offset when storing to webassembly global",
1321                          false);
1322 
1323     SDVTList Tys = DAG.getVTList(MVT::Other);
1324     SDValue Ops[] = {SN->getChain(), Value, Base};
1325     return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_SET, DL, Tys, Ops,
1326                                    SN->getMemoryVT(), SN->getMemOperand());
1327   }
1328 
1329   if (Optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1330     if (!Offset->isUndef())
1331       report_fatal_error("unexpected offset when storing to webassembly local",
1332                          false);
1333 
1334     SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1335     SDVTList Tys = DAG.getVTList(MVT::Other); // The chain.
1336     SDValue Ops[] = {SN->getChain(), Idx, Value};
1337     return DAG.getNode(WebAssemblyISD::LOCAL_SET, DL, Tys, Ops);
1338   }
1339 
1340   return Op;
1341 }
1342 
1343 SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
1344                                              SelectionDAG &DAG) const {
1345   SDLoc DL(Op);
1346   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
1347   const SDValue &Base = LN->getBasePtr();
1348   const SDValue &Offset = LN->getOffset();
1349 
1350   if (IsWebAssemblyGlobal(Base)) {
1351     if (!Offset->isUndef())
1352       report_fatal_error(
1353           "unexpected offset when loading from webassembly global", false);
1354 
1355     SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other);
1356     SDValue Ops[] = {LN->getChain(), Base};
1357     return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops,
1358                                    LN->getMemoryVT(), LN->getMemOperand());
1359   }
1360 
1361   if (Optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1362     if (!Offset->isUndef())
1363       report_fatal_error(
1364           "unexpected offset when loading from webassembly local", false);
1365 
1366     SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1367     EVT LocalVT = LN->getValueType(0);
1368     SDValue LocalGet = DAG.getNode(WebAssemblyISD::LOCAL_GET, DL, LocalVT,
1369                                    {LN->getChain(), Idx});
1370     SDValue Result = DAG.getMergeValues({LocalGet, LN->getChain()}, DL);
1371     assert(Result->getNumValues() == 2 && "Loads must carry a chain!");
1372     return Result;
1373   }
1374 
1375   return Op;
1376 }
1377 
1378 SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1379                                                   SelectionDAG &DAG) const {
1380   SDValue Src = Op.getOperand(2);
1381   if (isa<FrameIndexSDNode>(Src.getNode())) {
1382     // CopyToReg nodes don't support FrameIndex operands. Other targets select
1383     // the FI to some LEA-like instruction, but since we don't have that, we
1384     // need to insert some kind of instruction that can take an FI operand and
1385     // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1386     // local.copy between Op and its FI operand.
1387     SDValue Chain = Op.getOperand(0);
1388     SDLoc DL(Op);
1389     unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
1390     EVT VT = Src.getValueType();
1391     SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
1392                                                    : WebAssembly::COPY_I64,
1393                                     DL, VT, Src),
1394                  0);
1395     return Op.getNode()->getNumValues() == 1
1396                ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
1397                : DAG.getCopyToReg(Chain, DL, Reg, Copy,
1398                                   Op.getNumOperands() == 4 ? Op.getOperand(3)
1399                                                            : SDValue());
1400   }
1401   return SDValue();
1402 }
1403 
1404 SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
1405                                                    SelectionDAG &DAG) const {
1406   int FI = cast<FrameIndexSDNode>(Op)->getIndex();
1407   return DAG.getTargetFrameIndex(FI, Op.getValueType());
1408 }
1409 
1410 SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
1411                                                    SelectionDAG &DAG) const {
1412   SDLoc DL(Op);
1413 
1414   if (!Subtarget->getTargetTriple().isOSEmscripten()) {
1415     fail(DL, DAG,
1416          "Non-Emscripten WebAssembly hasn't implemented "
1417          "__builtin_return_address");
1418     return SDValue();
1419   }
1420 
1421   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1422     return SDValue();
1423 
1424   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1425   MakeLibCallOptions CallOptions;
1426   return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
1427                      {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
1428       .first;
1429 }
1430 
1431 SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
1432                                                   SelectionDAG &DAG) const {
1433   // Non-zero depths are not supported by WebAssembly currently. Use the
1434   // legalizer's default expansion, which is to return 0 (what this function is
1435   // documented to do).
1436   if (Op.getConstantOperandVal(0) > 0)
1437     return SDValue();
1438 
1439   DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
1440   EVT VT = Op.getValueType();
1441   Register FP =
1442       Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
1443   return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
1444 }
1445 
1446 SDValue
1447 WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1448                                                  SelectionDAG &DAG) const {
1449   SDLoc DL(Op);
1450   const auto *GA = cast<GlobalAddressSDNode>(Op);
1451   MVT PtrVT = getPointerTy(DAG.getDataLayout());
1452 
1453   MachineFunction &MF = DAG.getMachineFunction();
1454   if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
1455     report_fatal_error("cannot use thread-local storage without bulk memory",
1456                        false);
1457 
1458   const GlobalValue *GV = GA->getGlobal();
1459 
1460   // Currently Emscripten does not support dynamic linking with threads.
1461   // Therefore, if we have thread-local storage, only the local-exec model
1462   // is possible.
1463   // TODO: remove this and implement proper TLS models once Emscripten
1464   // supports dynamic linking with threads.
1465   if (GV->getThreadLocalMode() != GlobalValue::LocalExecTLSModel &&
1466       !Subtarget->getTargetTriple().isOSEmscripten()) {
1467     report_fatal_error("only -ftls-model=local-exec is supported for now on "
1468                        "non-Emscripten OSes: variable " +
1469                            GV->getName(),
1470                        false);
1471   }
1472 
1473   auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
1474                                      : WebAssembly::GLOBAL_GET_I32;
1475   const char *BaseName = MF.createExternalSymbolName("__tls_base");
1476 
1477   SDValue BaseAddr(
1478       DAG.getMachineNode(GlobalGet, DL, PtrVT,
1479                          DAG.getTargetExternalSymbol(BaseName, PtrVT)),
1480       0);
1481 
1482   SDValue TLSOffset = DAG.getTargetGlobalAddress(
1483       GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
1484   SDValue SymAddr = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, TLSOffset);
1485 
1486   return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
1487 }
1488 
1489 SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
1490                                                       SelectionDAG &DAG) const {
1491   SDLoc DL(Op);
1492   const auto *GA = cast<GlobalAddressSDNode>(Op);
1493   EVT VT = Op.getValueType();
1494   assert(GA->getTargetFlags() == 0 &&
1495          "Unexpected target flags on generic GlobalAddressSDNode");
1496   if (!WebAssembly::isValidAddressSpace(GA->getAddressSpace()))
1497     fail(DL, DAG, "Invalid address space for WebAssembly target");
1498 
1499   unsigned OperandFlags = 0;
1500   if (isPositionIndependent()) {
1501     const GlobalValue *GV = GA->getGlobal();
1502     if (getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) {
1503       MachineFunction &MF = DAG.getMachineFunction();
1504       MVT PtrVT = getPointerTy(MF.getDataLayout());
1505       const char *BaseName;
1506       if (GV->getValueType()->isFunctionTy()) {
1507         BaseName = MF.createExternalSymbolName("__table_base");
1508         OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL;
1509       }
1510       else {
1511         BaseName = MF.createExternalSymbolName("__memory_base");
1512         OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL;
1513       }
1514       SDValue BaseAddr =
1515           DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
1516                       DAG.getTargetExternalSymbol(BaseName, PtrVT));
1517 
1518       SDValue SymAddr = DAG.getNode(
1519           WebAssemblyISD::WrapperPIC, DL, VT,
1520           DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
1521                                      OperandFlags));
1522 
1523       return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
1524     } else {
1525       OperandFlags = WebAssemblyII::MO_GOT;
1526     }
1527   }
1528 
1529   return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1530                      DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
1531                                                 GA->getOffset(), OperandFlags));
1532 }
1533 
1534 SDValue
1535 WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
1536                                                SelectionDAG &DAG) const {
1537   SDLoc DL(Op);
1538   const auto *ES = cast<ExternalSymbolSDNode>(Op);
1539   EVT VT = Op.getValueType();
1540   assert(ES->getTargetFlags() == 0 &&
1541          "Unexpected target flags on generic ExternalSymbolSDNode");
1542   return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1543                      DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
1544 }
1545 
1546 SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
1547                                                   SelectionDAG &DAG) const {
1548   // There's no need for a Wrapper node because we always incorporate a jump
1549   // table operand into a BR_TABLE instruction, rather than ever
1550   // materializing it in a register.
1551   const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1552   return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
1553                                 JT->getTargetFlags());
1554 }
1555 
1556 SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
1557                                               SelectionDAG &DAG) const {
1558   SDLoc DL(Op);
1559   SDValue Chain = Op.getOperand(0);
1560   const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
1561   SDValue Index = Op.getOperand(2);
1562   assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
1563 
1564   SmallVector<SDValue, 8> Ops;
1565   Ops.push_back(Chain);
1566   Ops.push_back(Index);
1567 
1568   MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
1569   const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
1570 
1571   // Add an operand for each case.
1572   for (auto MBB : MBBs)
1573     Ops.push_back(DAG.getBasicBlock(MBB));
1574 
1575   // Add the first MBB as a dummy default target for now. This will be replaced
1576   // with the proper default target (and the preceding range check eliminated)
1577   // if possible by WebAssemblyFixBrTableDefaults.
1578   Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));
1579   return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
1580 }
1581 
1582 SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
1583                                                 SelectionDAG &DAG) const {
1584   SDLoc DL(Op);
1585   EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
1586 
1587   auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
1588   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1589 
1590   SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
1591                                     MFI->getVarargBufferVreg(), PtrVT);
1592   return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
1593                       MachinePointerInfo(SV));
1594 }
1595 
1596 static SDValue getCppExceptionSymNode(SDValue Op, unsigned TagIndex,
1597                                       SelectionDAG &DAG) {
1598   // We only support C++ exceptions for now
1599   int Tag =
1600       cast<ConstantSDNode>(Op.getOperand(TagIndex).getNode())->getZExtValue();
1601   if (Tag != WebAssembly::CPP_EXCEPTION)
1602     llvm_unreachable("Invalid tag: We only support C++ exceptions for now");
1603   auto &MF = DAG.getMachineFunction();
1604   const auto &TLI = DAG.getTargetLoweringInfo();
1605   MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
1606   const char *SymName = MF.createExternalSymbolName("__cpp_exception");
1607   return DAG.getNode(WebAssemblyISD::Wrapper, SDLoc(Op), PtrVT,
1608                      DAG.getTargetExternalSymbol(SymName, PtrVT));
1609 }
1610 
1611 SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
1612                                                   SelectionDAG &DAG) const {
1613   MachineFunction &MF = DAG.getMachineFunction();
1614   unsigned IntNo;
1615   switch (Op.getOpcode()) {
1616   case ISD::INTRINSIC_VOID:
1617   case ISD::INTRINSIC_W_CHAIN:
1618     IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1619     break;
1620   case ISD::INTRINSIC_WO_CHAIN:
1621     IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1622     break;
1623   default:
1624     llvm_unreachable("Invalid intrinsic");
1625   }
1626   SDLoc DL(Op);
1627 
1628   switch (IntNo) {
1629   default:
1630     return SDValue(); // Don't custom lower most intrinsics.
1631 
1632   case Intrinsic::wasm_lsda: {
1633     EVT VT = Op.getValueType();
1634     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1635     MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
1636     auto &Context = MF.getMMI().getContext();
1637     MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") +
1638                                             Twine(MF.getFunctionNumber()));
1639     return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1640                        DAG.getMCSymbol(S, PtrVT));
1641   }
1642 
1643   case Intrinsic::wasm_throw: {
1644     SDValue SymNode = getCppExceptionSymNode(Op, 2, DAG);
1645     return DAG.getNode(WebAssemblyISD::THROW, DL,
1646                        MVT::Other, // outchain type
1647                        {
1648                            Op.getOperand(0), // inchain
1649                            SymNode,          // exception symbol
1650                            Op.getOperand(3)  // thrown value
1651                        });
1652   }
1653 
1654   case Intrinsic::wasm_catch: {
1655     SDValue SymNode = getCppExceptionSymNode(Op, 2, DAG);
1656     return DAG.getNode(WebAssemblyISD::CATCH, DL,
1657                        {
1658                            MVT::i32,  // outchain type
1659                            MVT::Other // return value
1660                        },
1661                        {
1662                            Op.getOperand(0), // inchain
1663                            SymNode           // exception symbol
1664                        });
1665   }
1666 
1667   case Intrinsic::wasm_shuffle: {
1668     // Drop in-chain and replace undefs, but otherwise pass through unchanged
1669     SDValue Ops[18];
1670     size_t OpIdx = 0;
1671     Ops[OpIdx++] = Op.getOperand(1);
1672     Ops[OpIdx++] = Op.getOperand(2);
1673     while (OpIdx < 18) {
1674       const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
1675       if (MaskIdx.isUndef() ||
1676           cast<ConstantSDNode>(MaskIdx.getNode())->getZExtValue() >= 32) {
1677         Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32);
1678       } else {
1679         Ops[OpIdx++] = MaskIdx;
1680       }
1681     }
1682     return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
1683   }
1684   }
1685 }
1686 
1687 SDValue
1688 WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
1689                                                   SelectionDAG &DAG) const {
1690   SDLoc DL(Op);
1691   // If sign extension operations are disabled, allow sext_inreg only if operand
1692   // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
1693   // extension operations, but allowing sext_inreg in this context lets us have
1694   // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
1695   // everywhere would be simpler in this file, but would necessitate large and
1696   // brittle patterns to undo the expansion and select extract_lane_s
1697   // instructions.
1698   assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
1699   if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1700     return SDValue();
1701 
1702   const SDValue &Extract = Op.getOperand(0);
1703   MVT VecT = Extract.getOperand(0).getSimpleValueType();
1704   if (VecT.getVectorElementType().getSizeInBits() > 32)
1705     return SDValue();
1706   MVT ExtractedLaneT =
1707       cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();
1708   MVT ExtractedVecT =
1709       MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
1710   if (ExtractedVecT == VecT)
1711     return Op;
1712 
1713   // Bitcast vector to appropriate type to ensure ISel pattern coverage
1714   const SDNode *Index = Extract.getOperand(1).getNode();
1715   if (!isa<ConstantSDNode>(Index))
1716     return SDValue();
1717   unsigned IndexVal = cast<ConstantSDNode>(Index)->getZExtValue();
1718   unsigned Scale =
1719       ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
1720   assert(Scale > 1);
1721   SDValue NewIndex =
1722       DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));
1723   SDValue NewExtract = DAG.getNode(
1724       ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(),
1725       DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
1726   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
1727                      Op.getOperand(1));
1728 }
1729 
1730 SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
1731                                                      SelectionDAG &DAG) const {
1732   SDLoc DL(Op);
1733   const EVT VecT = Op.getValueType();
1734   const EVT LaneT = Op.getOperand(0).getValueType();
1735   const size_t Lanes = Op.getNumOperands();
1736   bool CanSwizzle = VecT == MVT::v16i8;
1737 
1738   // BUILD_VECTORs are lowered to the instruction that initializes the highest
1739   // possible number of lanes at once followed by a sequence of replace_lane
1740   // instructions to individually initialize any remaining lanes.
1741 
1742   // TODO: Tune this. For example, lanewise swizzling is very expensive, so
1743   // swizzled lanes should be given greater weight.
1744 
1745   // TODO: Investigate looping rather than always extracting/replacing specific
1746   // lanes to fill gaps.
1747 
1748   auto IsConstant = [](const SDValue &V) {
1749     return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
1750   };
1751 
1752   // Returns the source vector and index vector pair if they exist. Checks for:
1753   //   (extract_vector_elt
1754   //     $src,
1755   //     (sign_extend_inreg (extract_vector_elt $indices, $i))
1756   //   )
1757   auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
1758     auto Bail = std::make_pair(SDValue(), SDValue());
1759     if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1760       return Bail;
1761     const SDValue &SwizzleSrc = Lane->getOperand(0);
1762     const SDValue &IndexExt = Lane->getOperand(1);
1763     if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
1764       return Bail;
1765     const SDValue &Index = IndexExt->getOperand(0);
1766     if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1767       return Bail;
1768     const SDValue &SwizzleIndices = Index->getOperand(0);
1769     if (SwizzleSrc.getValueType() != MVT::v16i8 ||
1770         SwizzleIndices.getValueType() != MVT::v16i8 ||
1771         Index->getOperand(1)->getOpcode() != ISD::Constant ||
1772         Index->getConstantOperandVal(1) != I)
1773       return Bail;
1774     return std::make_pair(SwizzleSrc, SwizzleIndices);
1775   };
1776 
1777   // If the lane is extracted from another vector at a constant index, return
1778   // that vector. The source vector must not have more lanes than the dest
1779   // because the shufflevector indices are in terms of the destination lanes and
1780   // would not be able to address the smaller individual source lanes.
1781   auto GetShuffleSrc = [&](const SDValue &Lane) {
1782     if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1783       return SDValue();
1784     if (!isa<ConstantSDNode>(Lane->getOperand(1).getNode()))
1785       return SDValue();
1786     if (Lane->getOperand(0).getValueType().getVectorNumElements() >
1787         VecT.getVectorNumElements())
1788       return SDValue();
1789     return Lane->getOperand(0);
1790   };
1791 
1792   using ValueEntry = std::pair<SDValue, size_t>;
1793   SmallVector<ValueEntry, 16> SplatValueCounts;
1794 
1795   using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
1796   SmallVector<SwizzleEntry, 16> SwizzleCounts;
1797 
1798   using ShuffleEntry = std::pair<SDValue, size_t>;
1799   SmallVector<ShuffleEntry, 16> ShuffleCounts;
1800 
1801   auto AddCount = [](auto &Counts, const auto &Val) {
1802     auto CountIt =
1803         llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
1804     if (CountIt == Counts.end()) {
1805       Counts.emplace_back(Val, 1);
1806     } else {
1807       CountIt->second++;
1808     }
1809   };
1810 
1811   auto GetMostCommon = [](auto &Counts) {
1812     auto CommonIt =
1813         std::max_element(Counts.begin(), Counts.end(),
1814                          [](auto A, auto B) { return A.second < B.second; });
1815     assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
1816     return *CommonIt;
1817   };
1818 
1819   size_t NumConstantLanes = 0;
1820 
1821   // Count eligible lanes for each type of vector creation op
1822   for (size_t I = 0; I < Lanes; ++I) {
1823     const SDValue &Lane = Op->getOperand(I);
1824     if (Lane.isUndef())
1825       continue;
1826 
1827     AddCount(SplatValueCounts, Lane);
1828 
1829     if (IsConstant(Lane))
1830       NumConstantLanes++;
1831     if (auto ShuffleSrc = GetShuffleSrc(Lane))
1832       AddCount(ShuffleCounts, ShuffleSrc);
1833     if (CanSwizzle) {
1834       auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
1835       if (SwizzleSrcs.first)
1836         AddCount(SwizzleCounts, SwizzleSrcs);
1837     }
1838   }
1839 
1840   SDValue SplatValue;
1841   size_t NumSplatLanes;
1842   std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
1843 
1844   SDValue SwizzleSrc;
1845   SDValue SwizzleIndices;
1846   size_t NumSwizzleLanes = 0;
1847   if (SwizzleCounts.size())
1848     std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
1849                           NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
1850 
1851   // Shuffles can draw from up to two vectors, so find the two most common
1852   // sources.
1853   SDValue ShuffleSrc1, ShuffleSrc2;
1854   size_t NumShuffleLanes = 0;
1855   if (ShuffleCounts.size()) {
1856     std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts);
1857     ShuffleCounts.erase(std::remove_if(ShuffleCounts.begin(),
1858                                        ShuffleCounts.end(),
1859                                        [&](const auto &Pair) {
1860                                          return Pair.first == ShuffleSrc1;
1861                                        }),
1862                         ShuffleCounts.end());
1863   }
1864   if (ShuffleCounts.size()) {
1865     size_t AdditionalShuffleLanes;
1866     std::tie(ShuffleSrc2, AdditionalShuffleLanes) =
1867         GetMostCommon(ShuffleCounts);
1868     NumShuffleLanes += AdditionalShuffleLanes;
1869   }
1870 
1871   // Predicate returning true if the lane is properly initialized by the
1872   // original instruction
1873   std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
1874   SDValue Result;
1875   // Prefer swizzles over shuffles over vector consts over splats
1876   if (NumSwizzleLanes >= NumShuffleLanes &&
1877       NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) {
1878     Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
1879                          SwizzleIndices);
1880     auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
1881     IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
1882       return Swizzled == GetSwizzleSrcs(I, Lane);
1883     };
1884   } else if (NumShuffleLanes >= NumConstantLanes &&
1885              NumShuffleLanes >= NumSplatLanes) {
1886     size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8;
1887     size_t DestLaneCount = VecT.getVectorNumElements();
1888     size_t Scale1 = 1;
1889     size_t Scale2 = 1;
1890     SDValue Src1 = ShuffleSrc1;
1891     SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VecT);
1892     if (Src1.getValueType() != VecT) {
1893       size_t LaneSize =
1894           Src1.getValueType().getVectorElementType().getFixedSizeInBits() / 8;
1895       assert(LaneSize > DestLaneSize);
1896       Scale1 = LaneSize / DestLaneSize;
1897       Src1 = DAG.getBitcast(VecT, Src1);
1898     }
1899     if (Src2.getValueType() != VecT) {
1900       size_t LaneSize =
1901           Src2.getValueType().getVectorElementType().getFixedSizeInBits() / 8;
1902       assert(LaneSize > DestLaneSize);
1903       Scale2 = LaneSize / DestLaneSize;
1904       Src2 = DAG.getBitcast(VecT, Src2);
1905     }
1906 
1907     int Mask[16];
1908     assert(DestLaneCount <= 16);
1909     for (size_t I = 0; I < DestLaneCount; ++I) {
1910       const SDValue &Lane = Op->getOperand(I);
1911       SDValue Src = GetShuffleSrc(Lane);
1912       if (Src == ShuffleSrc1) {
1913         Mask[I] = Lane->getConstantOperandVal(1) * Scale1;
1914       } else if (Src && Src == ShuffleSrc2) {
1915         Mask[I] = DestLaneCount + Lane->getConstantOperandVal(1) * Scale2;
1916       } else {
1917         Mask[I] = -1;
1918       }
1919     }
1920     ArrayRef<int> MaskRef(Mask, DestLaneCount);
1921     Result = DAG.getVectorShuffle(VecT, DL, Src1, Src2, MaskRef);
1922     IsLaneConstructed = [&](size_t, const SDValue &Lane) {
1923       auto Src = GetShuffleSrc(Lane);
1924       return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2);
1925     };
1926   } else if (NumConstantLanes >= NumSplatLanes) {
1927     SmallVector<SDValue, 16> ConstLanes;
1928     for (const SDValue &Lane : Op->op_values()) {
1929       if (IsConstant(Lane)) {
1930         ConstLanes.push_back(Lane);
1931       } else if (LaneT.isFloatingPoint()) {
1932         ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
1933       } else {
1934         ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
1935       }
1936     }
1937     Result = DAG.getBuildVector(VecT, DL, ConstLanes);
1938     IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
1939       return IsConstant(Lane);
1940     };
1941   } else {
1942     // Use a splat, but possibly a load_splat
1943     LoadSDNode *SplattedLoad;
1944     if ((SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) &&
1945         SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) {
1946       Result = DAG.getMemIntrinsicNode(
1947           WebAssemblyISD::LOAD_SPLAT, DL, DAG.getVTList(VecT),
1948           {SplattedLoad->getChain(), SplattedLoad->getBasePtr(),
1949            SplattedLoad->getOffset()},
1950           SplattedLoad->getMemoryVT(), SplattedLoad->getMemOperand());
1951     } else {
1952       Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
1953     }
1954     IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
1955       return Lane == SplatValue;
1956     };
1957   }
1958 
1959   assert(Result);
1960   assert(IsLaneConstructed);
1961 
1962   // Add replace_lane instructions for any unhandled values
1963   for (size_t I = 0; I < Lanes; ++I) {
1964     const SDValue &Lane = Op->getOperand(I);
1965     if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
1966       Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
1967                            DAG.getConstant(I, DL, MVT::i32));
1968   }
1969 
1970   return Result;
1971 }
1972 
1973 SDValue
1974 WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
1975                                                SelectionDAG &DAG) const {
1976   SDLoc DL(Op);
1977   ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
1978   MVT VecType = Op.getOperand(0).getSimpleValueType();
1979   assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
1980   size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
1981 
1982   // Space for two vector args and sixteen mask indices
1983   SDValue Ops[18];
1984   size_t OpIdx = 0;
1985   Ops[OpIdx++] = Op.getOperand(0);
1986   Ops[OpIdx++] = Op.getOperand(1);
1987 
1988   // Expand mask indices to byte indices and materialize them as operands
1989   for (int M : Mask) {
1990     for (size_t J = 0; J < LaneBytes; ++J) {
1991       // Lower undefs (represented by -1 in mask) to zero
1992       uint64_t ByteIndex = M == -1 ? 0 : (uint64_t)M * LaneBytes + J;
1993       Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
1994     }
1995   }
1996 
1997   return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
1998 }
1999 
2000 SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
2001                                               SelectionDAG &DAG) const {
2002   SDLoc DL(Op);
2003   // The legalizer does not know how to expand the unsupported comparison modes
2004   // of i64x2 vectors, so we manually unroll them here.
2005   assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
2006   SmallVector<SDValue, 2> LHS, RHS;
2007   DAG.ExtractVectorElements(Op->getOperand(0), LHS);
2008   DAG.ExtractVectorElements(Op->getOperand(1), RHS);
2009   const SDValue &CC = Op->getOperand(2);
2010   auto MakeLane = [&](unsigned I) {
2011     return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
2012                        DAG.getConstant(uint64_t(-1), DL, MVT::i64),
2013                        DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
2014   };
2015   return DAG.getBuildVector(Op->getValueType(0), DL,
2016                             {MakeLane(0), MakeLane(1)});
2017 }
2018 
2019 SDValue
2020 WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
2021                                                     SelectionDAG &DAG) const {
2022   // Allow constant lane indices, expand variable lane indices
2023   SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
2024   if (isa<ConstantSDNode>(IdxNode) || IdxNode->isUndef())
2025     return Op;
2026   else
2027     // Perform default expansion
2028     return SDValue();
2029 }
2030 
2031 static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) {
2032   EVT LaneT = Op.getSimpleValueType().getVectorElementType();
2033   // 32-bit and 64-bit unrolled shifts will have proper semantics
2034   if (LaneT.bitsGE(MVT::i32))
2035     return DAG.UnrollVectorOp(Op.getNode());
2036   // Otherwise mask the shift value to get proper semantics from 32-bit shift
2037   SDLoc DL(Op);
2038   size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
2039   SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
2040   unsigned ShiftOpcode = Op.getOpcode();
2041   SmallVector<SDValue, 16> ShiftedElements;
2042   DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
2043   SmallVector<SDValue, 16> ShiftElements;
2044   DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
2045   SmallVector<SDValue, 16> UnrolledOps;
2046   for (size_t i = 0; i < NumLanes; ++i) {
2047     SDValue MaskedShiftValue =
2048         DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
2049     SDValue ShiftedValue = ShiftedElements[i];
2050     if (ShiftOpcode == ISD::SRA)
2051       ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
2052                                  ShiftedValue, DAG.getValueType(LaneT));
2053     UnrolledOps.push_back(
2054         DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
2055   }
2056   return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
2057 }
2058 
2059 SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
2060                                               SelectionDAG &DAG) const {
2061   SDLoc DL(Op);
2062 
2063   // Only manually lower vector shifts
2064   assert(Op.getSimpleValueType().isVector());
2065 
2066   auto ShiftVal = DAG.getSplatValue(Op.getOperand(1));
2067   if (!ShiftVal)
2068     return unrollVectorShift(Op, DAG);
2069 
2070   // Use anyext because none of the high bits can affect the shift
2071   ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
2072 
2073   unsigned Opcode;
2074   switch (Op.getOpcode()) {
2075   case ISD::SHL:
2076     Opcode = WebAssemblyISD::VEC_SHL;
2077     break;
2078   case ISD::SRA:
2079     Opcode = WebAssemblyISD::VEC_SHR_S;
2080     break;
2081   case ISD::SRL:
2082     Opcode = WebAssemblyISD::VEC_SHR_U;
2083     break;
2084   default:
2085     llvm_unreachable("unexpected opcode");
2086   }
2087 
2088   return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
2089 }
2090 
2091 SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
2092                                                       SelectionDAG &DAG) const {
2093   SDLoc DL(Op);
2094   EVT ResT = Op.getValueType();
2095   EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2096 
2097   if ((ResT == MVT::i32 || ResT == MVT::i64) &&
2098       (SatVT == MVT::i32 || SatVT == MVT::i64))
2099     return Op;
2100 
2101   if (ResT == MVT::v4i32 && SatVT == MVT::i32)
2102     return Op;
2103 
2104   return SDValue();
2105 }
2106 
2107 //===----------------------------------------------------------------------===//
2108 //   Custom DAG combine hooks
2109 //===----------------------------------------------------------------------===//
2110 static SDValue
2111 performVECTOR_SHUFFLECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
2112   auto &DAG = DCI.DAG;
2113   auto Shuffle = cast<ShuffleVectorSDNode>(N);
2114 
2115   // Hoist vector bitcasts that don't change the number of lanes out of unary
2116   // shuffles, where they are less likely to get in the way of other combines.
2117   // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
2118   //  (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
2119   SDValue Bitcast = N->getOperand(0);
2120   if (Bitcast.getOpcode() != ISD::BITCAST)
2121     return SDValue();
2122   if (!N->getOperand(1).isUndef())
2123     return SDValue();
2124   SDValue CastOp = Bitcast.getOperand(0);
2125   MVT SrcType = CastOp.getSimpleValueType();
2126   MVT DstType = Bitcast.getSimpleValueType();
2127   if (!SrcType.is128BitVector() ||
2128       SrcType.getVectorNumElements() != DstType.getVectorNumElements())
2129     return SDValue();
2130   SDValue NewShuffle = DAG.getVectorShuffle(
2131       SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());
2132   return DAG.getBitcast(DstType, NewShuffle);
2133 }
2134 
2135 static SDValue
2136 performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
2137   auto &DAG = DCI.DAG;
2138   assert(N->getOpcode() == ISD::SIGN_EXTEND ||
2139          N->getOpcode() == ISD::ZERO_EXTEND);
2140 
2141   // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
2142   // possible before the extract_subvector can be expanded.
2143   auto Extract = N->getOperand(0);
2144   if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2145     return SDValue();
2146   auto Source = Extract.getOperand(0);
2147   auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
2148   if (IndexNode == nullptr)
2149     return SDValue();
2150   auto Index = IndexNode->getZExtValue();
2151 
2152   // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
2153   // extracted subvector is the low or high half of its source.
2154   EVT ResVT = N->getValueType(0);
2155   if (ResVT == MVT::v8i16) {
2156     if (Extract.getValueType() != MVT::v8i8 ||
2157         Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
2158       return SDValue();
2159   } else if (ResVT == MVT::v4i32) {
2160     if (Extract.getValueType() != MVT::v4i16 ||
2161         Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
2162       return SDValue();
2163   } else if (ResVT == MVT::v2i64) {
2164     if (Extract.getValueType() != MVT::v2i32 ||
2165         Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2))
2166       return SDValue();
2167   } else {
2168     return SDValue();
2169   }
2170 
2171   bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
2172   bool IsLow = Index == 0;
2173 
2174   unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S
2175                                 : WebAssemblyISD::EXTEND_HIGH_S)
2176                        : (IsLow ? WebAssemblyISD::EXTEND_LOW_U
2177                                 : WebAssemblyISD::EXTEND_HIGH_U);
2178 
2179   return DAG.getNode(Op, SDLoc(N), ResVT, Source);
2180 }
2181 
2182 static SDValue
2183 performVectorConvertLowCombine(SDNode *N,
2184                                TargetLowering::DAGCombinerInfo &DCI) {
2185   auto &DAG = DCI.DAG;
2186 
2187   EVT ResVT = N->getValueType(0);
2188   if (ResVT != MVT::v2f64)
2189     return SDValue();
2190 
2191   auto GetWasmConversionOp = [](unsigned Op) {
2192     switch (Op) {
2193     case ISD::SINT_TO_FP:
2194       return WebAssemblyISD::CONVERT_LOW_S;
2195     case ISD::UINT_TO_FP:
2196       return WebAssemblyISD::CONVERT_LOW_U;
2197     case ISD::FP_EXTEND:
2198       return WebAssemblyISD::PROMOTE_LOW;
2199     }
2200     llvm_unreachable("unexpected op");
2201   };
2202 
2203   if (N->getOpcode() == ISD::EXTRACT_SUBVECTOR) {
2204     // Combine this:
2205     //
2206     //   (v2f64 (extract_subvector
2207     //     (v4f64 ({s,u}int_to_fp (v4i32 $x))), 0))
2208     //
2209     // into (f64x2.convert_low_i32x4_{s,u} $x).
2210     //
2211     // Or this:
2212     //
2213     //  (v2f64 (extract_subvector
2214     //    (v4f64 (fp_extend (v4f32 $x))), 0))
2215     //
2216     // into (f64x2.promote_low_f32x4 $x).
2217     auto Conversion = N->getOperand(0);
2218     auto ConversionOp = Conversion.getOpcode();
2219     MVT ExpectedSourceType;
2220     switch (ConversionOp) {
2221     case ISD::SINT_TO_FP:
2222     case ISD::UINT_TO_FP:
2223       ExpectedSourceType = MVT::v4i32;
2224       break;
2225     case ISD::FP_EXTEND:
2226       ExpectedSourceType = MVT::v4f32;
2227       break;
2228     default:
2229       return SDValue();
2230     }
2231 
2232     if (Conversion.getValueType() != MVT::v4f64)
2233       return SDValue();
2234 
2235     auto Source = Conversion.getOperand(0);
2236     if (Source.getValueType() != ExpectedSourceType)
2237       return SDValue();
2238 
2239     auto IndexNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
2240     if (IndexNode == nullptr || IndexNode->getZExtValue() != 0)
2241       return SDValue();
2242 
2243     auto Op = GetWasmConversionOp(ConversionOp);
2244     return DAG.getNode(Op, SDLoc(N), ResVT, Source);
2245   }
2246 
2247   // Combine this:
2248   //
2249   //   (v2f64 ({s,u}int_to_fp
2250   //     (v2i32 (extract_subvector (v4i32 $x), 0))))
2251   //
2252   // into (f64x2.convert_low_i32x4_{s,u} $x).
2253   //
2254   // Or this:
2255   //
2256   //   (v2f64 (fp_extend
2257   //     (v2f32 (extract_subvector (v4f32 $x), 0))))
2258   //
2259   // into (f64x2.promote_low_f32x4 $x).
2260   auto ConversionOp = N->getOpcode();
2261   MVT ExpectedExtractType;
2262   MVT ExpectedSourceType;
2263   switch (ConversionOp) {
2264   case ISD::SINT_TO_FP:
2265   case ISD::UINT_TO_FP:
2266     ExpectedExtractType = MVT::v2i32;
2267     ExpectedSourceType = MVT::v4i32;
2268     break;
2269   case ISD::FP_EXTEND:
2270     ExpectedExtractType = MVT::v2f32;
2271     ExpectedSourceType = MVT::v4f32;
2272     break;
2273   default:
2274     llvm_unreachable("unexpected opcode");
2275   }
2276 
2277   auto Extract = N->getOperand(0);
2278   if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2279     return SDValue();
2280 
2281   if (Extract.getValueType() != ExpectedExtractType)
2282     return SDValue();
2283 
2284   auto Source = Extract.getOperand(0);
2285   if (Source.getValueType() != ExpectedSourceType)
2286     return SDValue();
2287 
2288   auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
2289   if (IndexNode == nullptr || IndexNode->getZExtValue() != 0)
2290     return SDValue();
2291 
2292   unsigned Op = GetWasmConversionOp(ConversionOp);
2293   return DAG.getNode(Op, SDLoc(N), ResVT, Source);
2294 }
2295 
2296 static SDValue
2297 performVectorTruncSatLowCombine(SDNode *N,
2298                                 TargetLowering::DAGCombinerInfo &DCI) {
2299   auto &DAG = DCI.DAG;
2300   assert(N->getOpcode() == ISD::CONCAT_VECTORS);
2301 
2302   // Combine this:
2303   //
2304   //   (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
2305   //
2306   // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
2307   EVT ResVT = N->getValueType(0);
2308   if (ResVT != MVT::v4i32)
2309     return SDValue();
2310 
2311   auto FPToInt = N->getOperand(0);
2312   auto FPToIntOp = FPToInt.getOpcode();
2313   if (FPToIntOp != ISD::FP_TO_SINT_SAT && FPToIntOp != ISD::FP_TO_UINT_SAT)
2314     return SDValue();
2315   if (cast<VTSDNode>(FPToInt.getOperand(1))->getVT() != MVT::i32)
2316     return SDValue();
2317 
2318   auto Source = FPToInt.getOperand(0);
2319   if (Source.getValueType() != MVT::v2f64)
2320     return SDValue();
2321 
2322   auto *Splat = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
2323   APInt SplatValue, SplatUndef;
2324   unsigned SplatBitSize;
2325   bool HasAnyUndefs;
2326   if (!Splat || !Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
2327                                         HasAnyUndefs))
2328     return SDValue();
2329   if (SplatValue != 0)
2330     return SDValue();
2331 
2332   unsigned Op = FPToIntOp == ISD::FP_TO_SINT_SAT
2333                     ? WebAssemblyISD::TRUNC_SAT_ZERO_S
2334                     : WebAssemblyISD::TRUNC_SAT_ZERO_U;
2335 
2336   return DAG.getNode(Op, SDLoc(N), ResVT, Source);
2337 }
2338 
2339 SDValue
2340 WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
2341                                              DAGCombinerInfo &DCI) const {
2342   switch (N->getOpcode()) {
2343   default:
2344     return SDValue();
2345   case ISD::VECTOR_SHUFFLE:
2346     return performVECTOR_SHUFFLECombine(N, DCI);
2347   case ISD::SIGN_EXTEND:
2348   case ISD::ZERO_EXTEND:
2349     return performVectorExtendCombine(N, DCI);
2350   case ISD::SINT_TO_FP:
2351   case ISD::UINT_TO_FP:
2352   case ISD::FP_EXTEND:
2353   case ISD::EXTRACT_SUBVECTOR:
2354     return performVectorConvertLowCombine(N, DCI);
2355   case ISD::CONCAT_VECTORS:
2356     return performVectorTruncSatLowCombine(N, DCI);
2357   }
2358 }
2359