1 //=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the WebAssemblyTargetLowering class.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "WebAssemblyISelLowering.h"
15 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
16 #include "WebAssemblyMachineFunctionInfo.h"
17 #include "WebAssemblySubtarget.h"
18 #include "WebAssemblyTargetMachine.h"
19 #include "llvm/CodeGen/Analysis.h"
20 #include "llvm/CodeGen/CallingConvLower.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineJumpTableInfo.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/CodeGen/WasmEHFuncInfo.h"
27 #include "llvm/IR/DiagnosticInfo.h"
28 #include "llvm/IR/DiagnosticPrinter.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/IntrinsicsWebAssembly.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include "llvm/Target/TargetOptions.h"
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "wasm-lower"
39 
40 WebAssemblyTargetLowering::WebAssemblyTargetLowering(
41     const TargetMachine &TM, const WebAssemblySubtarget &STI)
42     : TargetLowering(TM), Subtarget(&STI) {
43   auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
44 
45   // Booleans always contain 0 or 1.
46   setBooleanContents(ZeroOrOneBooleanContent);
47   // Except in SIMD vectors
48   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
49   // We don't know the microarchitecture here, so just reduce register pressure.
50   setSchedulingPreference(Sched::RegPressure);
51   // Tell ISel that we have a stack pointer.
52   setStackPointerRegisterToSaveRestore(
53       Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
54   // Set up the register classes.
55   addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
56   addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
57   addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
58   addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
59   if (Subtarget->hasSIMD128()) {
60     addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
61     addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
62     addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
63     addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
64     addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
65     addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
66   }
67   // Compute derived properties from the register classes.
68   computeRegisterProperties(Subtarget->getRegisterInfo());
69 
70   setOperationAction(ISD::GlobalAddress, MVTPtr, Custom);
71   setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom);
72   setOperationAction(ISD::JumpTable, MVTPtr, Custom);
73   setOperationAction(ISD::BlockAddress, MVTPtr, Custom);
74   setOperationAction(ISD::BRIND, MVT::Other, Custom);
75 
76   // Take the default expansion for va_arg, va_copy, and va_end. There is no
77   // default action for va_start, so we do that custom.
78   setOperationAction(ISD::VASTART, MVT::Other, Custom);
79   setOperationAction(ISD::VAARG, MVT::Other, Expand);
80   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
81   setOperationAction(ISD::VAEND, MVT::Other, Expand);
82 
83   for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
84     // Don't expand the floating-point types to constant pools.
85     setOperationAction(ISD::ConstantFP, T, Legal);
86     // Expand floating-point comparisons.
87     for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
88                     ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
89       setCondCodeAction(CC, T, Expand);
90     // Expand floating-point library function operators.
91     for (auto Op :
92          {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA})
93       setOperationAction(Op, T, Expand);
94     // Note supported floating-point library function operators that otherwise
95     // default to expand.
96     for (auto Op :
97          {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT})
98       setOperationAction(Op, T, Legal);
99     // Support minimum and maximum, which otherwise default to expand.
100     setOperationAction(ISD::FMINIMUM, T, Legal);
101     setOperationAction(ISD::FMAXIMUM, T, Legal);
102     // WebAssembly currently has no builtin f16 support.
103     setOperationAction(ISD::FP16_TO_FP, T, Expand);
104     setOperationAction(ISD::FP_TO_FP16, T, Expand);
105     setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand);
106     setTruncStoreAction(T, MVT::f16, Expand);
107   }
108 
109   // Expand unavailable integer operations.
110   for (auto Op :
111        {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,
112         ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS,
113         ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) {
114     for (auto T : {MVT::i32, MVT::i64})
115       setOperationAction(Op, T, Expand);
116     if (Subtarget->hasSIMD128())
117       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
118         setOperationAction(Op, T, Expand);
119   }
120 
121   // SIMD-specific configuration
122   if (Subtarget->hasSIMD128()) {
123     // Support saturating add for i8x16 and i16x8
124     for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
125       for (auto T : {MVT::v16i8, MVT::v8i16})
126         setOperationAction(Op, T, Legal);
127 
128     // Support integer abs
129     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
130       setOperationAction(ISD::ABS, T, Legal);
131 
132     // Custom lower BUILD_VECTORs to minimize number of replace_lanes
133     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
134                    MVT::v2f64})
135       setOperationAction(ISD::BUILD_VECTOR, T, Custom);
136 
137     // We have custom shuffle lowering to expose the shuffle mask
138     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
139                    MVT::v2f64})
140       setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
141 
142     // Custom lowering since wasm shifts must have a scalar shift amount
143     for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
144       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
145         setOperationAction(Op, T, Custom);
146 
147     // Custom lower lane accesses to expand out variable indices
148     for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT})
149       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
150                      MVT::v2f64})
151         setOperationAction(Op, T, Custom);
152 
153     // There is no i64x2.mul instruction
154     // TODO: Actually, there is now. Implement it.
155     setOperationAction(ISD::MUL, MVT::v2i64, Expand);
156 
157     // There are no vector select instructions
158     for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT})
159       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
160                      MVT::v2f64})
161         setOperationAction(Op, T, Expand);
162 
163     // Expand integer operations supported for scalars but not SIMD
164     for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV,
165                     ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})
166       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
167         setOperationAction(Op, T, Expand);
168 
169     // But we do have integer min and max operations
170     for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
171       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
172         setOperationAction(Op, T, Legal);
173 
174     // Expand float operations supported for scalars but not SIMD
175     for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
176                     ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
177                     ISD::FEXP, ISD::FEXP2, ISD::FRINT})
178       for (auto T : {MVT::v4f32, MVT::v2f64})
179         setOperationAction(Op, T, Expand);
180 
181     // Expand operations not supported for i64x2 vectors
182     for (unsigned CC = 0; CC < ISD::SETCC_INVALID; ++CC)
183       setCondCodeAction(static_cast<ISD::CondCode>(CC), MVT::v2i64, Custom);
184 
185     // 64x2 conversions are not in the spec
186     if (!Subtarget->hasUnimplementedSIMD128())
187       for (auto Op :
188            {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT})
189         for (auto T : {MVT::v2i64, MVT::v2f64})
190           setOperationAction(Op, T, Expand);
191   }
192 
193   // As a special case, these operators use the type to mean the type to
194   // sign-extend from.
195   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
196   if (!Subtarget->hasSignExt()) {
197     // Sign extends are legal only when extending a vector extract
198     auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
199     for (auto T : {MVT::i8, MVT::i16, MVT::i32})
200       setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action);
201   }
202   for (auto T : MVT::integer_fixedlen_vector_valuetypes())
203     setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
204 
205   // Dynamic stack allocation: use the default expansion.
206   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
207   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
208   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);
209 
210   setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
211   setOperationAction(ISD::CopyToReg, MVT::Other, Custom);
212 
213   // Expand these forms; we pattern-match the forms that we can handle in isel.
214   for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
215     for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
216       setOperationAction(Op, T, Expand);
217 
218   // We have custom switch handling.
219   setOperationAction(ISD::BR_JT, MVT::Other, Custom);
220 
221   // WebAssembly doesn't have:
222   //  - Floating-point extending loads.
223   //  - Floating-point truncating stores.
224   //  - i1 extending loads.
225   //  - truncating SIMD stores and most extending loads
226   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
227   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
228   for (auto T : MVT::integer_valuetypes())
229     for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
230       setLoadExtAction(Ext, T, MVT::i1, Promote);
231   if (Subtarget->hasSIMD128()) {
232     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
233                    MVT::v2f64}) {
234       for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
235         if (MVT(T) != MemT) {
236           setTruncStoreAction(T, MemT, Expand);
237           for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
238             setLoadExtAction(Ext, T, MemT, Expand);
239         }
240       }
241     }
242     // But some vector extending loads are legal
243     if (Subtarget->hasUnimplementedSIMD128()) {
244       for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
245         setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
246         setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
247         setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
248       }
249     }
250   }
251 
252   // Don't do anything clever with build_pairs
253   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
254 
255   // Trap lowers to wasm unreachable
256   setOperationAction(ISD::TRAP, MVT::Other, Legal);
257 
258   // Exception handling intrinsics
259   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
260   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
261 
262   setMaxAtomicSizeInBitsSupported(64);
263 
264   // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is
265   // consistent with the f64 and f128 names.
266   setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
267   setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
268 
269   // Define the emscripten name for return address helper.
270   // TODO: when implementing other WASM backends, make this generic or only do
271   // this on emscripten depending on what they end up doing.
272   setLibcallName(RTLIB::RETURN_ADDRESS, "emscripten_return_address");
273 
274   // Always convert switches to br_tables unless there is only one case, which
275   // is equivalent to a simple branch. This reduces code size for wasm, and we
276   // defer possible jump table optimizations to the VM.
277   setMinimumJumpTableEntries(2);
278 }
279 
280 TargetLowering::AtomicExpansionKind
281 WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
282   // We have wasm instructions for these
283   switch (AI->getOperation()) {
284   case AtomicRMWInst::Add:
285   case AtomicRMWInst::Sub:
286   case AtomicRMWInst::And:
287   case AtomicRMWInst::Or:
288   case AtomicRMWInst::Xor:
289   case AtomicRMWInst::Xchg:
290     return AtomicExpansionKind::None;
291   default:
292     break;
293   }
294   return AtomicExpansionKind::CmpXChg;
295 }
296 
297 FastISel *WebAssemblyTargetLowering::createFastISel(
298     FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
299   return WebAssembly::createFastISel(FuncInfo, LibInfo);
300 }
301 
302 MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
303                                                       EVT VT) const {
304   unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
305   if (BitWidth > 1 && BitWidth < 8)
306     BitWidth = 8;
307 
308   if (BitWidth > 64) {
309     // The shift will be lowered to a libcall, and compiler-rt libcalls expect
310     // the count to be an i32.
311     BitWidth = 32;
312     assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) &&
313            "32-bit shift counts ought to be enough for anyone");
314   }
315 
316   MVT Result = MVT::getIntegerVT(BitWidth);
317   assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE &&
318          "Unable to represent scalar shift amount type");
319   return Result;
320 }
321 
322 // Lower an fp-to-int conversion operator from the LLVM opcode, which has an
323 // undefined result on invalid/overflow, to the WebAssembly opcode, which
324 // traps on invalid/overflow.
325 static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
326                                        MachineBasicBlock *BB,
327                                        const TargetInstrInfo &TII,
328                                        bool IsUnsigned, bool Int64,
329                                        bool Float64, unsigned LoweredOpcode) {
330   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
331 
332   Register OutReg = MI.getOperand(0).getReg();
333   Register InReg = MI.getOperand(1).getReg();
334 
335   unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
336   unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
337   unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
338   unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
339   unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
340   unsigned Eqz = WebAssembly::EQZ_I32;
341   unsigned And = WebAssembly::AND_I32;
342   int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
343   int64_t Substitute = IsUnsigned ? 0 : Limit;
344   double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
345   auto &Context = BB->getParent()->getFunction().getContext();
346   Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
347 
348   const BasicBlock *LLVMBB = BB->getBasicBlock();
349   MachineFunction *F = BB->getParent();
350   MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
351   MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
352   MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
353 
354   MachineFunction::iterator It = ++BB->getIterator();
355   F->insert(It, FalseMBB);
356   F->insert(It, TrueMBB);
357   F->insert(It, DoneMBB);
358 
359   // Transfer the remainder of BB and its successor edges to DoneMBB.
360   DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
361   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
362 
363   BB->addSuccessor(TrueMBB);
364   BB->addSuccessor(FalseMBB);
365   TrueMBB->addSuccessor(DoneMBB);
366   FalseMBB->addSuccessor(DoneMBB);
367 
368   unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
369   Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
370   Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
371   CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
372   EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
373   FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
374   TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
375 
376   MI.eraseFromParent();
377   // For signed numbers, we can do a single comparison to determine whether
378   // fabs(x) is within range.
379   if (IsUnsigned) {
380     Tmp0 = InReg;
381   } else {
382     BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
383   }
384   BuildMI(BB, DL, TII.get(FConst), Tmp1)
385       .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
386   BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
387 
388   // For unsigned numbers, we have to do a separate comparison with zero.
389   if (IsUnsigned) {
390     Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
391     Register SecondCmpReg =
392         MRI.createVirtualRegister(&WebAssembly::I32RegClass);
393     Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
394     BuildMI(BB, DL, TII.get(FConst), Tmp1)
395         .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
396     BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
397     BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
398     CmpReg = AndReg;
399   }
400 
401   BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
402 
403   // Create the CFG diamond to select between doing the conversion or using
404   // the substitute value.
405   BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
406   BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
407   BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
408   BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
409   BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
410       .addReg(FalseReg)
411       .addMBB(FalseMBB)
412       .addReg(TrueReg)
413       .addMBB(TrueMBB);
414 
415   return DoneMBB;
416 }
417 
418 static MachineBasicBlock *LowerCallResults(MachineInstr &CallResults,
419                                            DebugLoc DL, MachineBasicBlock *BB,
420                                            const TargetInstrInfo &TII) {
421   MachineInstr &CallParams = *CallResults.getPrevNode();
422   assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
423   assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
424          CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
425 
426   bool IsIndirect = CallParams.getOperand(0).isReg();
427   bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
428 
429   unsigned CallOp;
430   if (IsIndirect && IsRetCall) {
431     CallOp = WebAssembly::RET_CALL_INDIRECT;
432   } else if (IsIndirect) {
433     CallOp = WebAssembly::CALL_INDIRECT;
434   } else if (IsRetCall) {
435     CallOp = WebAssembly::RET_CALL;
436   } else {
437     CallOp = WebAssembly::CALL;
438   }
439 
440   MachineFunction &MF = *BB->getParent();
441   const MCInstrDesc &MCID = TII.get(CallOp);
442   MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
443 
444   // Move the function pointer to the end of the arguments for indirect calls
445   if (IsIndirect) {
446     auto FnPtr = CallParams.getOperand(0);
447     CallParams.RemoveOperand(0);
448     CallParams.addOperand(FnPtr);
449   }
450 
451   for (auto Def : CallResults.defs())
452     MIB.add(Def);
453 
454   // Add placeholders for the type index and immediate flags
455   if (IsIndirect) {
456     MIB.addImm(0);
457     MIB.addImm(0);
458   }
459 
460   for (auto Use : CallParams.uses())
461     MIB.add(Use);
462 
463   BB->insert(CallResults.getIterator(), MIB);
464   CallParams.eraseFromParent();
465   CallResults.eraseFromParent();
466 
467   return BB;
468 }
469 
470 MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
471     MachineInstr &MI, MachineBasicBlock *BB) const {
472   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
473   DebugLoc DL = MI.getDebugLoc();
474 
475   switch (MI.getOpcode()) {
476   default:
477     llvm_unreachable("Unexpected instr type to insert");
478   case WebAssembly::FP_TO_SINT_I32_F32:
479     return LowerFPToInt(MI, DL, BB, TII, false, false, false,
480                         WebAssembly::I32_TRUNC_S_F32);
481   case WebAssembly::FP_TO_UINT_I32_F32:
482     return LowerFPToInt(MI, DL, BB, TII, true, false, false,
483                         WebAssembly::I32_TRUNC_U_F32);
484   case WebAssembly::FP_TO_SINT_I64_F32:
485     return LowerFPToInt(MI, DL, BB, TII, false, true, false,
486                         WebAssembly::I64_TRUNC_S_F32);
487   case WebAssembly::FP_TO_UINT_I64_F32:
488     return LowerFPToInt(MI, DL, BB, TII, true, true, false,
489                         WebAssembly::I64_TRUNC_U_F32);
490   case WebAssembly::FP_TO_SINT_I32_F64:
491     return LowerFPToInt(MI, DL, BB, TII, false, false, true,
492                         WebAssembly::I32_TRUNC_S_F64);
493   case WebAssembly::FP_TO_UINT_I32_F64:
494     return LowerFPToInt(MI, DL, BB, TII, true, false, true,
495                         WebAssembly::I32_TRUNC_U_F64);
496   case WebAssembly::FP_TO_SINT_I64_F64:
497     return LowerFPToInt(MI, DL, BB, TII, false, true, true,
498                         WebAssembly::I64_TRUNC_S_F64);
499   case WebAssembly::FP_TO_UINT_I64_F64:
500     return LowerFPToInt(MI, DL, BB, TII, true, true, true,
501                         WebAssembly::I64_TRUNC_U_F64);
502   case WebAssembly::CALL_RESULTS:
503   case WebAssembly::RET_CALL_RESULTS:
504     return LowerCallResults(MI, DL, BB, TII);
505   }
506 }
507 
508 const char *
509 WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {
510   switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
511   case WebAssemblyISD::FIRST_NUMBER:
512   case WebAssemblyISD::FIRST_MEM_OPCODE:
513     break;
514 #define HANDLE_NODETYPE(NODE)                                                  \
515   case WebAssemblyISD::NODE:                                                   \
516     return "WebAssemblyISD::" #NODE;
517 #define HANDLE_MEM_NODETYPE(NODE) HANDLE_NODETYPE(NODE)
518 #include "WebAssemblyISD.def"
519 #undef HANDLE_MEM_NODETYPE
520 #undef HANDLE_NODETYPE
521   }
522   return nullptr;
523 }
524 
525 std::pair<unsigned, const TargetRegisterClass *>
526 WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
527     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
528   // First, see if this is a constraint that directly corresponds to a
529   // WebAssembly register class.
530   if (Constraint.size() == 1) {
531     switch (Constraint[0]) {
532     case 'r':
533       assert(VT != MVT::iPTR && "Pointer MVT not expected here");
534       if (Subtarget->hasSIMD128() && VT.isVector()) {
535         if (VT.getSizeInBits() == 128)
536           return std::make_pair(0U, &WebAssembly::V128RegClass);
537       }
538       if (VT.isInteger() && !VT.isVector()) {
539         if (VT.getSizeInBits() <= 32)
540           return std::make_pair(0U, &WebAssembly::I32RegClass);
541         if (VT.getSizeInBits() <= 64)
542           return std::make_pair(0U, &WebAssembly::I64RegClass);
543       }
544       break;
545     default:
546       break;
547     }
548   }
549 
550   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
551 }
552 
553 bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const {
554   // Assume ctz is a relatively cheap operation.
555   return true;
556 }
557 
558 bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const {
559   // Assume clz is a relatively cheap operation.
560   return true;
561 }
562 
563 bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
564                                                       const AddrMode &AM,
565                                                       Type *Ty, unsigned AS,
566                                                       Instruction *I) const {
567   // WebAssembly offsets are added as unsigned without wrapping. The
568   // isLegalAddressingMode gives us no way to determine if wrapping could be
569   // happening, so we approximate this by accepting only non-negative offsets.
570   if (AM.BaseOffs < 0)
571     return false;
572 
573   // WebAssembly has no scale register operands.
574   if (AM.Scale != 0)
575     return false;
576 
577   // Everything else is legal.
578   return true;
579 }
580 
581 bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
582     EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/,
583     MachineMemOperand::Flags /*Flags*/, bool *Fast) const {
584   // WebAssembly supports unaligned accesses, though it should be declared
585   // with the p2align attribute on loads and stores which do so, and there
586   // may be a performance impact. We tell LLVM they're "fast" because
587   // for the kinds of things that LLVM uses this for (merging adjacent stores
588   // of constants, etc.), WebAssembly implementations will either want the
589   // unaligned access or they'll split anyway.
590   if (Fast)
591     *Fast = true;
592   return true;
593 }
594 
595 bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
596                                               AttributeList Attr) const {
597   // The current thinking is that wasm engines will perform this optimization,
598   // so we can save on code size.
599   return true;
600 }
601 
602 bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
603   if (!Subtarget->hasUnimplementedSIMD128())
604     return false;
605   MVT ExtT = ExtVal.getSimpleValueType();
606   MVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getSimpleValueType(0);
607   return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
608          (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
609          (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
610 }
611 
612 EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
613                                                   LLVMContext &C,
614                                                   EVT VT) const {
615   if (VT.isVector())
616     return VT.changeVectorElementTypeToInteger();
617 
618   return TargetLowering::getSetCCResultType(DL, C, VT);
619 }
620 
621 bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
622                                                    const CallInst &I,
623                                                    MachineFunction &MF,
624                                                    unsigned Intrinsic) const {
625   switch (Intrinsic) {
626   case Intrinsic::wasm_atomic_notify:
627     Info.opc = ISD::INTRINSIC_W_CHAIN;
628     Info.memVT = MVT::i32;
629     Info.ptrVal = I.getArgOperand(0);
630     Info.offset = 0;
631     Info.align = Align(4);
632     // atomic.notify instruction does not really load the memory specified with
633     // this argument, but MachineMemOperand should either be load or store, so
634     // we set this to a load.
635     // FIXME Volatile isn't really correct, but currently all LLVM atomic
636     // instructions are treated as volatiles in the backend, so we should be
637     // consistent. The same applies for wasm_atomic_wait intrinsics too.
638     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
639     return true;
640   case Intrinsic::wasm_atomic_wait_i32:
641     Info.opc = ISD::INTRINSIC_W_CHAIN;
642     Info.memVT = MVT::i32;
643     Info.ptrVal = I.getArgOperand(0);
644     Info.offset = 0;
645     Info.align = Align(4);
646     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
647     return true;
648   case Intrinsic::wasm_atomic_wait_i64:
649     Info.opc = ISD::INTRINSIC_W_CHAIN;
650     Info.memVT = MVT::i64;
651     Info.ptrVal = I.getArgOperand(0);
652     Info.offset = 0;
653     Info.align = Align(8);
654     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
655     return true;
656   default:
657     return false;
658   }
659 }
660 
661 //===----------------------------------------------------------------------===//
662 // WebAssembly Lowering private implementation.
663 //===----------------------------------------------------------------------===//
664 
665 //===----------------------------------------------------------------------===//
666 // Lowering Code
667 //===----------------------------------------------------------------------===//
668 
669 static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
670   MachineFunction &MF = DAG.getMachineFunction();
671   DAG.getContext()->diagnose(
672       DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
673 }
674 
675 // Test whether the given calling convention is supported.
676 static bool callingConvSupported(CallingConv::ID CallConv) {
677   // We currently support the language-independent target-independent
678   // conventions. We don't yet have a way to annotate calls with properties like
679   // "cold", and we don't have any call-clobbered registers, so these are mostly
680   // all handled the same.
681   return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
682          CallConv == CallingConv::Cold ||
683          CallConv == CallingConv::PreserveMost ||
684          CallConv == CallingConv::PreserveAll ||
685          CallConv == CallingConv::CXX_FAST_TLS ||
686          CallConv == CallingConv::WASM_EmscriptenInvoke ||
687          CallConv == CallingConv::Swift;
688 }
689 
690 SDValue
691 WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
692                                      SmallVectorImpl<SDValue> &InVals) const {
693   SelectionDAG &DAG = CLI.DAG;
694   SDLoc DL = CLI.DL;
695   SDValue Chain = CLI.Chain;
696   SDValue Callee = CLI.Callee;
697   MachineFunction &MF = DAG.getMachineFunction();
698   auto Layout = MF.getDataLayout();
699 
700   CallingConv::ID CallConv = CLI.CallConv;
701   if (!callingConvSupported(CallConv))
702     fail(DL, DAG,
703          "WebAssembly doesn't support language-specific or target-specific "
704          "calling conventions yet");
705   if (CLI.IsPatchPoint)
706     fail(DL, DAG, "WebAssembly doesn't support patch point yet");
707 
708   if (CLI.IsTailCall) {
709     auto NoTail = [&](const char *Msg) {
710       if (CLI.CB && CLI.CB->isMustTailCall())
711         fail(DL, DAG, Msg);
712       CLI.IsTailCall = false;
713     };
714 
715     if (!Subtarget->hasTailCall())
716       NoTail("WebAssembly 'tail-call' feature not enabled");
717 
718     // Varargs calls cannot be tail calls because the buffer is on the stack
719     if (CLI.IsVarArg)
720       NoTail("WebAssembly does not support varargs tail calls");
721 
722     // Do not tail call unless caller and callee return types match
723     const Function &F = MF.getFunction();
724     const TargetMachine &TM = getTargetMachine();
725     Type *RetTy = F.getReturnType();
726     SmallVector<MVT, 4> CallerRetTys;
727     SmallVector<MVT, 4> CalleeRetTys;
728     computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
729     computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
730     bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
731                       std::equal(CallerRetTys.begin(), CallerRetTys.end(),
732                                  CalleeRetTys.begin());
733     if (!TypesMatch)
734       NoTail("WebAssembly tail call requires caller and callee return types to "
735              "match");
736 
737     // If pointers to local stack values are passed, we cannot tail call
738     if (CLI.CB) {
739       for (auto &Arg : CLI.CB->args()) {
740         Value *Val = Arg.get();
741         // Trace the value back through pointer operations
742         while (true) {
743           Value *Src = Val->stripPointerCastsAndAliases();
744           if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
745             Src = GEP->getPointerOperand();
746           if (Val == Src)
747             break;
748           Val = Src;
749         }
750         if (isa<AllocaInst>(Val)) {
751           NoTail(
752               "WebAssembly does not support tail calling with stack arguments");
753           break;
754         }
755       }
756     }
757   }
758 
759   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
760   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
761   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
762 
763   // The generic code may have added an sret argument. If we're lowering an
764   // invoke function, the ABI requires that the function pointer be the first
765   // argument, so we may have to swap the arguments.
766   if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
767       Outs[0].Flags.isSRet()) {
768     std::swap(Outs[0], Outs[1]);
769     std::swap(OutVals[0], OutVals[1]);
770   }
771 
772   bool HasSwiftSelfArg = false;
773   bool HasSwiftErrorArg = false;
774   unsigned NumFixedArgs = 0;
775   for (unsigned I = 0; I < Outs.size(); ++I) {
776     const ISD::OutputArg &Out = Outs[I];
777     SDValue &OutVal = OutVals[I];
778     HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
779     HasSwiftErrorArg |= Out.Flags.isSwiftError();
780     if (Out.Flags.isNest())
781       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
782     if (Out.Flags.isInAlloca())
783       fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
784     if (Out.Flags.isInConsecutiveRegs())
785       fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
786     if (Out.Flags.isInConsecutiveRegsLast())
787       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
788     if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
789       auto &MFI = MF.getFrameInfo();
790       int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
791                                      Out.Flags.getNonZeroByValAlign(),
792                                      /*isSS=*/false);
793       SDValue SizeNode =
794           DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
795       SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
796       Chain = DAG.getMemcpy(
797           Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getNonZeroByValAlign(),
798           /*isVolatile*/ false, /*AlwaysInline=*/false,
799           /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
800       OutVal = FINode;
801     }
802     // Count the number of fixed args *after* legalization.
803     NumFixedArgs += Out.IsFixed;
804   }
805 
806   bool IsVarArg = CLI.IsVarArg;
807   auto PtrVT = getPointerTy(Layout);
808 
809   // For swiftcc, emit additional swiftself and swifterror arguments
810   // if there aren't. These additional arguments are also added for callee
811   // signature They are necessary to match callee and caller signature for
812   // indirect call.
813   if (CallConv == CallingConv::Swift) {
814     if (!HasSwiftSelfArg) {
815       NumFixedArgs++;
816       ISD::OutputArg Arg;
817       Arg.Flags.setSwiftSelf();
818       CLI.Outs.push_back(Arg);
819       SDValue ArgVal = DAG.getUNDEF(PtrVT);
820       CLI.OutVals.push_back(ArgVal);
821     }
822     if (!HasSwiftErrorArg) {
823       NumFixedArgs++;
824       ISD::OutputArg Arg;
825       Arg.Flags.setSwiftError();
826       CLI.Outs.push_back(Arg);
827       SDValue ArgVal = DAG.getUNDEF(PtrVT);
828       CLI.OutVals.push_back(ArgVal);
829     }
830   }
831 
832   // Analyze operands of the call, assigning locations to each operand.
833   SmallVector<CCValAssign, 16> ArgLocs;
834   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
835 
836   if (IsVarArg) {
837     // Outgoing non-fixed arguments are placed in a buffer. First
838     // compute their offsets and the total amount of buffer space needed.
839     for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
840       const ISD::OutputArg &Out = Outs[I];
841       SDValue &Arg = OutVals[I];
842       EVT VT = Arg.getValueType();
843       assert(VT != MVT::iPTR && "Legalized args should be concrete");
844       Type *Ty = VT.getTypeForEVT(*DAG.getContext());
845       unsigned Align = std::max(Out.Flags.getOrigAlign(),
846                                 Layout.getABITypeAlignment(Ty));
847       unsigned Offset = CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty),
848                                              Align);
849       CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
850                                         Offset, VT.getSimpleVT(),
851                                         CCValAssign::Full));
852     }
853   }
854 
855   unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
856 
857   SDValue FINode;
858   if (IsVarArg && NumBytes) {
859     // For non-fixed arguments, next emit stores to store the argument values
860     // to the stack buffer at the offsets computed above.
861     int FI = MF.getFrameInfo().CreateStackObject(NumBytes,
862                                                  Layout.getStackAlignment(),
863                                                  /*isSS=*/false);
864     unsigned ValNo = 0;
865     SmallVector<SDValue, 8> Chains;
866     for (SDValue Arg :
867          make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) {
868       assert(ArgLocs[ValNo].getValNo() == ValNo &&
869              "ArgLocs should remain in order and only hold varargs args");
870       unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
871       FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
872       SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
873                                 DAG.getConstant(Offset, DL, PtrVT));
874       Chains.push_back(
875           DAG.getStore(Chain, DL, Arg, Add,
876                        MachinePointerInfo::getFixedStack(MF, FI, Offset), 0));
877     }
878     if (!Chains.empty())
879       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
880   } else if (IsVarArg) {
881     FINode = DAG.getIntPtrConstant(0, DL);
882   }
883 
884   if (Callee->getOpcode() == ISD::GlobalAddress) {
885     // If the callee is a GlobalAddress node (quite common, every direct call
886     // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
887     // doesn't at MO_GOT which is not needed for direct calls.
888     GlobalAddressSDNode* GA = cast<GlobalAddressSDNode>(Callee);
889     Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
890                                         getPointerTy(DAG.getDataLayout()),
891                                         GA->getOffset());
892     Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
893                          getPointerTy(DAG.getDataLayout()), Callee);
894   }
895 
896   // Compute the operands for the CALLn node.
897   SmallVector<SDValue, 16> Ops;
898   Ops.push_back(Chain);
899   Ops.push_back(Callee);
900 
901   // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
902   // isn't reliable.
903   Ops.append(OutVals.begin(),
904              IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
905   // Add a pointer to the vararg buffer.
906   if (IsVarArg)
907     Ops.push_back(FINode);
908 
909   SmallVector<EVT, 8> InTys;
910   for (const auto &In : Ins) {
911     assert(!In.Flags.isByVal() && "byval is not valid for return values");
912     assert(!In.Flags.isNest() && "nest is not valid for return values");
913     if (In.Flags.isInAlloca())
914       fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
915     if (In.Flags.isInConsecutiveRegs())
916       fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
917     if (In.Flags.isInConsecutiveRegsLast())
918       fail(DL, DAG,
919            "WebAssembly hasn't implemented cons regs last return values");
920     // Ignore In.getOrigAlign() because all our arguments are passed in
921     // registers.
922     InTys.push_back(In.VT);
923   }
924 
925   if (CLI.IsTailCall) {
926     // ret_calls do not return values to the current frame
927     SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
928     return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
929   }
930 
931   InTys.push_back(MVT::Other);
932   SDVTList InTyList = DAG.getVTList(InTys);
933   SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
934 
935   for (size_t I = 0; I < Ins.size(); ++I)
936     InVals.push_back(Res.getValue(I));
937 
938   // Return the chain
939   return Res.getValue(Ins.size());
940 }
941 
942 bool WebAssemblyTargetLowering::CanLowerReturn(
943     CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
944     const SmallVectorImpl<ISD::OutputArg> &Outs,
945     LLVMContext & /*Context*/) const {
946   // WebAssembly can only handle returning tuples with multivalue enabled
947   return Subtarget->hasMultivalue() || Outs.size() <= 1;
948 }
949 
950 SDValue WebAssemblyTargetLowering::LowerReturn(
951     SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
952     const SmallVectorImpl<ISD::OutputArg> &Outs,
953     const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
954     SelectionDAG &DAG) const {
955   assert((Subtarget->hasMultivalue() || Outs.size() <= 1) &&
956          "MVP WebAssembly can only return up to one value");
957   if (!callingConvSupported(CallConv))
958     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
959 
960   SmallVector<SDValue, 4> RetOps(1, Chain);
961   RetOps.append(OutVals.begin(), OutVals.end());
962   Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
963 
964   // Record the number and types of the return values.
965   for (const ISD::OutputArg &Out : Outs) {
966     assert(!Out.Flags.isByVal() && "byval is not valid for return values");
967     assert(!Out.Flags.isNest() && "nest is not valid for return values");
968     assert(Out.IsFixed && "non-fixed return value is not valid");
969     if (Out.Flags.isInAlloca())
970       fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
971     if (Out.Flags.isInConsecutiveRegs())
972       fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
973     if (Out.Flags.isInConsecutiveRegsLast())
974       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
975   }
976 
977   return Chain;
978 }
979 
980 SDValue WebAssemblyTargetLowering::LowerFormalArguments(
981     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
982     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
983     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
984   if (!callingConvSupported(CallConv))
985     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
986 
987   MachineFunction &MF = DAG.getMachineFunction();
988   auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
989 
990   // Set up the incoming ARGUMENTS value, which serves to represent the liveness
991   // of the incoming values before they're represented by virtual registers.
992   MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
993 
994   bool HasSwiftErrorArg = false;
995   bool HasSwiftSelfArg = false;
996   for (const ISD::InputArg &In : Ins) {
997     HasSwiftSelfArg |= In.Flags.isSwiftSelf();
998     HasSwiftErrorArg |= In.Flags.isSwiftError();
999     if (In.Flags.isInAlloca())
1000       fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1001     if (In.Flags.isNest())
1002       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1003     if (In.Flags.isInConsecutiveRegs())
1004       fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1005     if (In.Flags.isInConsecutiveRegsLast())
1006       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1007     // Ignore In.getOrigAlign() because all our arguments are passed in
1008     // registers.
1009     InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
1010                                            DAG.getTargetConstant(InVals.size(),
1011                                                                  DL, MVT::i32))
1012                              : DAG.getUNDEF(In.VT));
1013 
1014     // Record the number and types of arguments.
1015     MFI->addParam(In.VT);
1016   }
1017 
1018   // For swiftcc, emit additional swiftself and swifterror arguments
1019   // if there aren't. These additional arguments are also added for callee
1020   // signature They are necessary to match callee and caller signature for
1021   // indirect call.
1022   auto PtrVT = getPointerTy(MF.getDataLayout());
1023   if (CallConv == CallingConv::Swift) {
1024     if (!HasSwiftSelfArg) {
1025       MFI->addParam(PtrVT);
1026     }
1027     if (!HasSwiftErrorArg) {
1028       MFI->addParam(PtrVT);
1029     }
1030   }
1031   // Varargs are copied into a buffer allocated by the caller, and a pointer to
1032   // the buffer is passed as an argument.
1033   if (IsVarArg) {
1034     MVT PtrVT = getPointerTy(MF.getDataLayout());
1035     Register VarargVreg =
1036         MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT));
1037     MFI->setVarargBufferVreg(VarargVreg);
1038     Chain = DAG.getCopyToReg(
1039         Chain, DL, VarargVreg,
1040         DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
1041                     DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
1042     MFI->addParam(PtrVT);
1043   }
1044 
1045   // Record the number and types of arguments and results.
1046   SmallVector<MVT, 4> Params;
1047   SmallVector<MVT, 4> Results;
1048   computeSignatureVTs(MF.getFunction().getFunctionType(), &MF.getFunction(),
1049                       MF.getFunction(), DAG.getTarget(), Params, Results);
1050   for (MVT VT : Results)
1051     MFI->addResult(VT);
1052   // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1053   // the param logic here with ComputeSignatureVTs
1054   assert(MFI->getParams().size() == Params.size() &&
1055          std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1056                     Params.begin()));
1057 
1058   return Chain;
1059 }
1060 
1061 void WebAssemblyTargetLowering::ReplaceNodeResults(
1062     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
1063   switch (N->getOpcode()) {
1064   case ISD::SIGN_EXTEND_INREG:
1065     // Do not add any results, signifying that N should not be custom lowered
1066     // after all. This happens because simd128 turns on custom lowering for
1067     // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1068     // illegal type.
1069     break;
1070   default:
1071     llvm_unreachable(
1072         "ReplaceNodeResults not implemented for this op for WebAssembly!");
1073   }
1074 }
1075 
1076 //===----------------------------------------------------------------------===//
1077 //  Custom lowering hooks.
1078 //===----------------------------------------------------------------------===//
1079 
1080 SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1081                                                   SelectionDAG &DAG) const {
1082   SDLoc DL(Op);
1083   switch (Op.getOpcode()) {
1084   default:
1085     llvm_unreachable("unimplemented operation lowering");
1086     return SDValue();
1087   case ISD::FrameIndex:
1088     return LowerFrameIndex(Op, DAG);
1089   case ISD::GlobalAddress:
1090     return LowerGlobalAddress(Op, DAG);
1091   case ISD::ExternalSymbol:
1092     return LowerExternalSymbol(Op, DAG);
1093   case ISD::JumpTable:
1094     return LowerJumpTable(Op, DAG);
1095   case ISD::BR_JT:
1096     return LowerBR_JT(Op, DAG);
1097   case ISD::VASTART:
1098     return LowerVASTART(Op, DAG);
1099   case ISD::BlockAddress:
1100   case ISD::BRIND:
1101     fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1102     return SDValue();
1103   case ISD::RETURNADDR:
1104     return LowerRETURNADDR(Op, DAG);
1105   case ISD::FRAMEADDR:
1106     return LowerFRAMEADDR(Op, DAG);
1107   case ISD::CopyToReg:
1108     return LowerCopyToReg(Op, DAG);
1109   case ISD::EXTRACT_VECTOR_ELT:
1110   case ISD::INSERT_VECTOR_ELT:
1111     return LowerAccessVectorElement(Op, DAG);
1112   case ISD::INTRINSIC_VOID:
1113   case ISD::INTRINSIC_WO_CHAIN:
1114   case ISD::INTRINSIC_W_CHAIN:
1115     return LowerIntrinsic(Op, DAG);
1116   case ISD::SIGN_EXTEND_INREG:
1117     return LowerSIGN_EXTEND_INREG(Op, DAG);
1118   case ISD::BUILD_VECTOR:
1119     return LowerBUILD_VECTOR(Op, DAG);
1120   case ISD::VECTOR_SHUFFLE:
1121     return LowerVECTOR_SHUFFLE(Op, DAG);
1122   case ISD::SETCC:
1123     return LowerSETCC(Op, DAG);
1124   case ISD::SHL:
1125   case ISD::SRA:
1126   case ISD::SRL:
1127     return LowerShift(Op, DAG);
1128   }
1129 }
1130 
1131 SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1132                                                   SelectionDAG &DAG) const {
1133   SDValue Src = Op.getOperand(2);
1134   if (isa<FrameIndexSDNode>(Src.getNode())) {
1135     // CopyToReg nodes don't support FrameIndex operands. Other targets select
1136     // the FI to some LEA-like instruction, but since we don't have that, we
1137     // need to insert some kind of instruction that can take an FI operand and
1138     // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1139     // local.copy between Op and its FI operand.
1140     SDValue Chain = Op.getOperand(0);
1141     SDLoc DL(Op);
1142     unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
1143     EVT VT = Src.getValueType();
1144     SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
1145                                                    : WebAssembly::COPY_I64,
1146                                     DL, VT, Src),
1147                  0);
1148     return Op.getNode()->getNumValues() == 1
1149                ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
1150                : DAG.getCopyToReg(Chain, DL, Reg, Copy,
1151                                   Op.getNumOperands() == 4 ? Op.getOperand(3)
1152                                                            : SDValue());
1153   }
1154   return SDValue();
1155 }
1156 
1157 SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
1158                                                    SelectionDAG &DAG) const {
1159   int FI = cast<FrameIndexSDNode>(Op)->getIndex();
1160   return DAG.getTargetFrameIndex(FI, Op.getValueType());
1161 }
1162 
1163 SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
1164                                                    SelectionDAG &DAG) const {
1165   SDLoc DL(Op);
1166 
1167   if (!Subtarget->getTargetTriple().isOSEmscripten()) {
1168     fail(DL, DAG,
1169          "Non-Emscripten WebAssembly hasn't implemented "
1170          "__builtin_return_address");
1171     return SDValue();
1172   }
1173 
1174   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1175     return SDValue();
1176 
1177   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1178   MakeLibCallOptions CallOptions;
1179   return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
1180                      {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
1181       .first;
1182 }
1183 
1184 SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
1185                                                   SelectionDAG &DAG) const {
1186   // Non-zero depths are not supported by WebAssembly currently. Use the
1187   // legalizer's default expansion, which is to return 0 (what this function is
1188   // documented to do).
1189   if (Op.getConstantOperandVal(0) > 0)
1190     return SDValue();
1191 
1192   DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
1193   EVT VT = Op.getValueType();
1194   Register FP =
1195       Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
1196   return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
1197 }
1198 
1199 SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
1200                                                       SelectionDAG &DAG) const {
1201   SDLoc DL(Op);
1202   const auto *GA = cast<GlobalAddressSDNode>(Op);
1203   EVT VT = Op.getValueType();
1204   assert(GA->getTargetFlags() == 0 &&
1205          "Unexpected target flags on generic GlobalAddressSDNode");
1206   if (GA->getAddressSpace() != 0)
1207     fail(DL, DAG, "WebAssembly only expects the 0 address space");
1208 
1209   unsigned OperandFlags = 0;
1210   if (isPositionIndependent()) {
1211     const GlobalValue *GV = GA->getGlobal();
1212     if (getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) {
1213       MachineFunction &MF = DAG.getMachineFunction();
1214       MVT PtrVT = getPointerTy(MF.getDataLayout());
1215       const char *BaseName;
1216       if (GV->getValueType()->isFunctionTy()) {
1217         BaseName = MF.createExternalSymbolName("__table_base");
1218         OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL;
1219       }
1220       else {
1221         BaseName = MF.createExternalSymbolName("__memory_base");
1222         OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL;
1223       }
1224       SDValue BaseAddr =
1225           DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
1226                       DAG.getTargetExternalSymbol(BaseName, PtrVT));
1227 
1228       SDValue SymAddr = DAG.getNode(
1229           WebAssemblyISD::WrapperPIC, DL, VT,
1230           DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
1231                                      OperandFlags));
1232 
1233       return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
1234     } else {
1235       OperandFlags = WebAssemblyII::MO_GOT;
1236     }
1237   }
1238 
1239   return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1240                      DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
1241                                                 GA->getOffset(), OperandFlags));
1242 }
1243 
1244 SDValue
1245 WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
1246                                                SelectionDAG &DAG) const {
1247   SDLoc DL(Op);
1248   const auto *ES = cast<ExternalSymbolSDNode>(Op);
1249   EVT VT = Op.getValueType();
1250   assert(ES->getTargetFlags() == 0 &&
1251          "Unexpected target flags on generic ExternalSymbolSDNode");
1252   return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1253                      DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
1254 }
1255 
1256 SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
1257                                                   SelectionDAG &DAG) const {
1258   // There's no need for a Wrapper node because we always incorporate a jump
1259   // table operand into a BR_TABLE instruction, rather than ever
1260   // materializing it in a register.
1261   const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1262   return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
1263                                 JT->getTargetFlags());
1264 }
1265 
1266 SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
1267                                               SelectionDAG &DAG) const {
1268   SDLoc DL(Op);
1269   SDValue Chain = Op.getOperand(0);
1270   const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
1271   SDValue Index = Op.getOperand(2);
1272   assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
1273 
1274   SmallVector<SDValue, 8> Ops;
1275   Ops.push_back(Chain);
1276   Ops.push_back(Index);
1277 
1278   MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
1279   const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
1280 
1281   // Add an operand for each case.
1282   for (auto MBB : MBBs)
1283     Ops.push_back(DAG.getBasicBlock(MBB));
1284 
1285   // TODO: For now, we just pick something arbitrary for a default case for now.
1286   // We really want to sniff out the guard and put in the real default case (and
1287   // delete the guard).
1288   Ops.push_back(DAG.getBasicBlock(MBBs[0]));
1289 
1290   return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
1291 }
1292 
1293 SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
1294                                                 SelectionDAG &DAG) const {
1295   SDLoc DL(Op);
1296   EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
1297 
1298   auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
1299   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1300 
1301   SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
1302                                     MFI->getVarargBufferVreg(), PtrVT);
1303   return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
1304                       MachinePointerInfo(SV), 0);
1305 }
1306 
1307 SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
1308                                                   SelectionDAG &DAG) const {
1309   MachineFunction &MF = DAG.getMachineFunction();
1310   unsigned IntNo;
1311   switch (Op.getOpcode()) {
1312   case ISD::INTRINSIC_VOID:
1313   case ISD::INTRINSIC_W_CHAIN:
1314     IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1315     break;
1316   case ISD::INTRINSIC_WO_CHAIN:
1317     IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1318     break;
1319   default:
1320     llvm_unreachable("Invalid intrinsic");
1321   }
1322   SDLoc DL(Op);
1323 
1324   switch (IntNo) {
1325   default:
1326     return SDValue(); // Don't custom lower most intrinsics.
1327 
1328   case Intrinsic::wasm_lsda: {
1329     EVT VT = Op.getValueType();
1330     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1331     MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
1332     auto &Context = MF.getMMI().getContext();
1333     MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") +
1334                                             Twine(MF.getFunctionNumber()));
1335     return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1336                        DAG.getMCSymbol(S, PtrVT));
1337   }
1338 
1339   case Intrinsic::wasm_throw: {
1340     // We only support C++ exceptions for now
1341     int Tag = cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1342     if (Tag != CPP_EXCEPTION)
1343       llvm_unreachable("Invalid tag!");
1344     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1345     MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
1346     const char *SymName = MF.createExternalSymbolName("__cpp_exception");
1347     SDValue SymNode = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
1348                                   DAG.getTargetExternalSymbol(SymName, PtrVT));
1349     return DAG.getNode(WebAssemblyISD::THROW, DL,
1350                        MVT::Other, // outchain type
1351                        {
1352                            Op.getOperand(0), // inchain
1353                            SymNode,          // exception symbol
1354                            Op.getOperand(3)  // thrown value
1355                        });
1356   }
1357   }
1358 }
1359 
1360 SDValue
1361 WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
1362                                                   SelectionDAG &DAG) const {
1363   SDLoc DL(Op);
1364   // If sign extension operations are disabled, allow sext_inreg only if operand
1365   // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
1366   // extension operations, but allowing sext_inreg in this context lets us have
1367   // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
1368   // everywhere would be simpler in this file, but would necessitate large and
1369   // brittle patterns to undo the expansion and select extract_lane_s
1370   // instructions.
1371   assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
1372   if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1373     return SDValue();
1374 
1375   const SDValue &Extract = Op.getOperand(0);
1376   MVT VecT = Extract.getOperand(0).getSimpleValueType();
1377   if (VecT.getVectorElementType().getSizeInBits() > 32)
1378     return SDValue();
1379   MVT ExtractedLaneT = static_cast<VTSDNode *>(Op.getOperand(1).getNode())
1380                            ->getVT()
1381                            .getSimpleVT();
1382   MVT ExtractedVecT =
1383       MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
1384   if (ExtractedVecT == VecT)
1385     return Op;
1386 
1387   // Bitcast vector to appropriate type to ensure ISel pattern coverage
1388   const SDValue &Index = Extract.getOperand(1);
1389   unsigned IndexVal =
1390       static_cast<ConstantSDNode *>(Index.getNode())->getZExtValue();
1391   unsigned Scale =
1392       ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
1393   assert(Scale > 1);
1394   SDValue NewIndex =
1395       DAG.getConstant(IndexVal * Scale, DL, Index.getValueType());
1396   SDValue NewExtract = DAG.getNode(
1397       ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(),
1398       DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
1399   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
1400                      Op.getOperand(1));
1401 }
1402 
1403 SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
1404                                                      SelectionDAG &DAG) const {
1405   SDLoc DL(Op);
1406   const EVT VecT = Op.getValueType();
1407   const EVT LaneT = Op.getOperand(0).getValueType();
1408   const size_t Lanes = Op.getNumOperands();
1409   bool CanSwizzle = Subtarget->hasUnimplementedSIMD128() && VecT == MVT::v16i8;
1410 
1411   // BUILD_VECTORs are lowered to the instruction that initializes the highest
1412   // possible number of lanes at once followed by a sequence of replace_lane
1413   // instructions to individually initialize any remaining lanes.
1414 
1415   // TODO: Tune this. For example, lanewise swizzling is very expensive, so
1416   // swizzled lanes should be given greater weight.
1417 
1418   // TODO: Investigate building vectors by shuffling together vectors built by
1419   // separately specialized means.
1420 
1421   auto IsConstant = [](const SDValue &V) {
1422     return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
1423   };
1424 
1425   // Returns the source vector and index vector pair if they exist. Checks for:
1426   //   (extract_vector_elt
1427   //     $src,
1428   //     (sign_extend_inreg (extract_vector_elt $indices, $i))
1429   //   )
1430   auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
1431     auto Bail = std::make_pair(SDValue(), SDValue());
1432     if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1433       return Bail;
1434     const SDValue &SwizzleSrc = Lane->getOperand(0);
1435     const SDValue &IndexExt = Lane->getOperand(1);
1436     if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
1437       return Bail;
1438     const SDValue &Index = IndexExt->getOperand(0);
1439     if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1440       return Bail;
1441     const SDValue &SwizzleIndices = Index->getOperand(0);
1442     if (SwizzleSrc.getValueType() != MVT::v16i8 ||
1443         SwizzleIndices.getValueType() != MVT::v16i8 ||
1444         Index->getOperand(1)->getOpcode() != ISD::Constant ||
1445         Index->getConstantOperandVal(1) != I)
1446       return Bail;
1447     return std::make_pair(SwizzleSrc, SwizzleIndices);
1448   };
1449 
1450   using ValueEntry = std::pair<SDValue, size_t>;
1451   SmallVector<ValueEntry, 16> SplatValueCounts;
1452 
1453   using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
1454   SmallVector<SwizzleEntry, 16> SwizzleCounts;
1455 
1456   auto AddCount = [](auto &Counts, const auto &Val) {
1457     auto CountIt = std::find_if(Counts.begin(), Counts.end(),
1458                                 [&Val](auto E) { return E.first == Val; });
1459     if (CountIt == Counts.end()) {
1460       Counts.emplace_back(Val, 1);
1461     } else {
1462       CountIt->second++;
1463     }
1464   };
1465 
1466   auto GetMostCommon = [](auto &Counts) {
1467     auto CommonIt =
1468         std::max_element(Counts.begin(), Counts.end(),
1469                          [](auto A, auto B) { return A.second < B.second; });
1470     assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
1471     return *CommonIt;
1472   };
1473 
1474   size_t NumConstantLanes = 0;
1475 
1476   // Count eligible lanes for each type of vector creation op
1477   for (size_t I = 0; I < Lanes; ++I) {
1478     const SDValue &Lane = Op->getOperand(I);
1479     if (Lane.isUndef())
1480       continue;
1481 
1482     AddCount(SplatValueCounts, Lane);
1483 
1484     if (IsConstant(Lane)) {
1485       NumConstantLanes++;
1486     } else if (CanSwizzle) {
1487       auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
1488       if (SwizzleSrcs.first)
1489         AddCount(SwizzleCounts, SwizzleSrcs);
1490     }
1491   }
1492 
1493   SDValue SplatValue;
1494   size_t NumSplatLanes;
1495   std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
1496 
1497   SDValue SwizzleSrc;
1498   SDValue SwizzleIndices;
1499   size_t NumSwizzleLanes = 0;
1500   if (SwizzleCounts.size())
1501     std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
1502                           NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
1503 
1504   // Predicate returning true if the lane is properly initialized by the
1505   // original instruction
1506   std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
1507   SDValue Result;
1508   if (Subtarget->hasUnimplementedSIMD128()) {
1509     // Prefer swizzles over vector consts over splats
1510     if (NumSwizzleLanes >= NumSplatLanes &&
1511         NumSwizzleLanes >= NumConstantLanes) {
1512       Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
1513                            SwizzleIndices);
1514       auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
1515       IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
1516         return Swizzled == GetSwizzleSrcs(I, Lane);
1517       };
1518     } else if (NumConstantLanes >= NumSplatLanes) {
1519       SmallVector<SDValue, 16> ConstLanes;
1520       for (const SDValue &Lane : Op->op_values()) {
1521         if (IsConstant(Lane)) {
1522           ConstLanes.push_back(Lane);
1523         } else if (LaneT.isFloatingPoint()) {
1524           ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
1525         } else {
1526           ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
1527         }
1528       }
1529       Result = DAG.getBuildVector(VecT, DL, ConstLanes);
1530       IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
1531         return IsConstant(Lane);
1532       };
1533     }
1534   }
1535   if (!Result) {
1536     // Use a splat, but possibly a load_splat
1537     LoadSDNode *SplattedLoad;
1538     if (Subtarget->hasUnimplementedSIMD128() &&
1539         (SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) &&
1540         SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) {
1541       Result = DAG.getMemIntrinsicNode(
1542           WebAssemblyISD::LOAD_SPLAT, DL, DAG.getVTList(VecT),
1543           {SplattedLoad->getChain(), SplattedLoad->getBasePtr(),
1544            SplattedLoad->getOffset()},
1545           SplattedLoad->getMemoryVT(), SplattedLoad->getMemOperand());
1546     } else {
1547       Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
1548     }
1549     IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
1550       return Lane == SplatValue;
1551     };
1552   }
1553 
1554   // Add replace_lane instructions for any unhandled values
1555   for (size_t I = 0; I < Lanes; ++I) {
1556     const SDValue &Lane = Op->getOperand(I);
1557     if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
1558       Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
1559                            DAG.getConstant(I, DL, MVT::i32));
1560   }
1561 
1562   return Result;
1563 }
1564 
1565 SDValue
1566 WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
1567                                                SelectionDAG &DAG) const {
1568   SDLoc DL(Op);
1569   ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
1570   MVT VecType = Op.getOperand(0).getSimpleValueType();
1571   assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
1572   size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
1573 
1574   // Space for two vector args and sixteen mask indices
1575   SDValue Ops[18];
1576   size_t OpIdx = 0;
1577   Ops[OpIdx++] = Op.getOperand(0);
1578   Ops[OpIdx++] = Op.getOperand(1);
1579 
1580   // Expand mask indices to byte indices and materialize them as operands
1581   for (int M : Mask) {
1582     for (size_t J = 0; J < LaneBytes; ++J) {
1583       // Lower undefs (represented by -1 in mask) to zero
1584       uint64_t ByteIndex = M == -1 ? 0 : (uint64_t)M * LaneBytes + J;
1585       Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
1586     }
1587   }
1588 
1589   return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
1590 }
1591 
1592 SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
1593                                               SelectionDAG &DAG) const {
1594   SDLoc DL(Op);
1595   // The legalizer does not know how to expand the comparison modes of i64x2
1596   // vectors because no comparison modes are supported. We could solve this by
1597   // expanding all i64x2 SETCC nodes, but that seems to expand f64x2 SETCC nodes
1598   // (which return i64x2 results) as well. So instead we manually unroll i64x2
1599   // comparisons here.
1600   assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
1601   SmallVector<SDValue, 2> LHS, RHS;
1602   DAG.ExtractVectorElements(Op->getOperand(0), LHS);
1603   DAG.ExtractVectorElements(Op->getOperand(1), RHS);
1604   const SDValue &CC = Op->getOperand(2);
1605   auto MakeLane = [&](unsigned I) {
1606     return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
1607                        DAG.getConstant(uint64_t(-1), DL, MVT::i64),
1608                        DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
1609   };
1610   return DAG.getBuildVector(Op->getValueType(0), DL,
1611                             {MakeLane(0), MakeLane(1)});
1612 }
1613 
1614 SDValue
1615 WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
1616                                                     SelectionDAG &DAG) const {
1617   // Allow constant lane indices, expand variable lane indices
1618   SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
1619   if (isa<ConstantSDNode>(IdxNode) || IdxNode->isUndef())
1620     return Op;
1621   else
1622     // Perform default expansion
1623     return SDValue();
1624 }
1625 
1626 static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) {
1627   EVT LaneT = Op.getSimpleValueType().getVectorElementType();
1628   // 32-bit and 64-bit unrolled shifts will have proper semantics
1629   if (LaneT.bitsGE(MVT::i32))
1630     return DAG.UnrollVectorOp(Op.getNode());
1631   // Otherwise mask the shift value to get proper semantics from 32-bit shift
1632   SDLoc DL(Op);
1633   size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
1634   SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
1635   unsigned ShiftOpcode = Op.getOpcode();
1636   SmallVector<SDValue, 16> ShiftedElements;
1637   DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
1638   SmallVector<SDValue, 16> ShiftElements;
1639   DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
1640   SmallVector<SDValue, 16> UnrolledOps;
1641   for (size_t i = 0; i < NumLanes; ++i) {
1642     SDValue MaskedShiftValue =
1643         DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
1644     SDValue ShiftedValue = ShiftedElements[i];
1645     if (ShiftOpcode == ISD::SRA)
1646       ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
1647                                  ShiftedValue, DAG.getValueType(LaneT));
1648     UnrolledOps.push_back(
1649         DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
1650   }
1651   return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
1652 }
1653 
1654 SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
1655                                               SelectionDAG &DAG) const {
1656   SDLoc DL(Op);
1657 
1658   // Only manually lower vector shifts
1659   assert(Op.getSimpleValueType().isVector());
1660 
1661   // Unroll non-splat vector shifts
1662   BuildVectorSDNode *ShiftVec;
1663   SDValue SplatVal;
1664   if (!(ShiftVec = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) ||
1665       !(SplatVal = ShiftVec->getSplatValue()))
1666     return unrollVectorShift(Op, DAG);
1667 
1668   // All splats except i64x2 const splats are handled by patterns
1669   auto *SplatConst = dyn_cast<ConstantSDNode>(SplatVal);
1670   if (!SplatConst || Op.getSimpleValueType() != MVT::v2i64)
1671     return Op;
1672 
1673   // i64x2 const splats are custom lowered to avoid unnecessary wraps
1674   unsigned Opcode;
1675   switch (Op.getOpcode()) {
1676   case ISD::SHL:
1677     Opcode = WebAssemblyISD::VEC_SHL;
1678     break;
1679   case ISD::SRA:
1680     Opcode = WebAssemblyISD::VEC_SHR_S;
1681     break;
1682   case ISD::SRL:
1683     Opcode = WebAssemblyISD::VEC_SHR_U;
1684     break;
1685   default:
1686     llvm_unreachable("unexpected opcode");
1687   }
1688   APInt Shift = SplatConst->getAPIntValue().zextOrTrunc(32);
1689   return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0),
1690                      DAG.getConstant(Shift, DL, MVT::i32));
1691 }
1692 
1693 //===----------------------------------------------------------------------===//
1694 //                          WebAssembly Optimization Hooks
1695 //===----------------------------------------------------------------------===//
1696