1 //=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the WebAssemblyTargetLowering class.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "WebAssemblyISelLowering.h"
15 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
16 #include "WebAssemblyMachineFunctionInfo.h"
17 #include "WebAssemblySubtarget.h"
18 #include "WebAssemblyTargetMachine.h"
19 #include "llvm/CodeGen/Analysis.h"
20 #include "llvm/CodeGen/CallingConvLower.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineJumpTableInfo.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/CodeGen/WasmEHFuncInfo.h"
27 #include "llvm/IR/DiagnosticInfo.h"
28 #include "llvm/IR/DiagnosticPrinter.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/raw_ostream.h"
34 #include "llvm/Target/TargetOptions.h"
35 using namespace llvm;
36 
37 #define DEBUG_TYPE "wasm-lower"
38 
39 WebAssemblyTargetLowering::WebAssemblyTargetLowering(
40     const TargetMachine &TM, const WebAssemblySubtarget &STI)
41     : TargetLowering(TM), Subtarget(&STI) {
42   auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
43 
44   // Booleans always contain 0 or 1.
45   setBooleanContents(ZeroOrOneBooleanContent);
46   // Except in SIMD vectors
47   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
48   // We don't know the microarchitecture here, so just reduce register pressure.
49   setSchedulingPreference(Sched::RegPressure);
50   // Tell ISel that we have a stack pointer.
51   setStackPointerRegisterToSaveRestore(
52       Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
53   // Set up the register classes.
54   addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
55   addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
56   addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
57   addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
58   if (Subtarget->hasSIMD128()) {
59     addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
60     addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
61     addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
62     addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
63   }
64   if (Subtarget->hasUnimplementedSIMD128()) {
65     addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
66     addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
67   }
68   // Compute derived properties from the register classes.
69   computeRegisterProperties(Subtarget->getRegisterInfo());
70 
71   setOperationAction(ISD::GlobalAddress, MVTPtr, Custom);
72   setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom);
73   setOperationAction(ISD::JumpTable, MVTPtr, Custom);
74   setOperationAction(ISD::BlockAddress, MVTPtr, Custom);
75   setOperationAction(ISD::BRIND, MVT::Other, Custom);
76 
77   // Take the default expansion for va_arg, va_copy, and va_end. There is no
78   // default action for va_start, so we do that custom.
79   setOperationAction(ISD::VASTART, MVT::Other, Custom);
80   setOperationAction(ISD::VAARG, MVT::Other, Expand);
81   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
82   setOperationAction(ISD::VAEND, MVT::Other, Expand);
83 
84   for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
85     // Don't expand the floating-point types to constant pools.
86     setOperationAction(ISD::ConstantFP, T, Legal);
87     // Expand floating-point comparisons.
88     for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
89                     ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
90       setCondCodeAction(CC, T, Expand);
91     // Expand floating-point library function operators.
92     for (auto Op :
93          {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA})
94       setOperationAction(Op, T, Expand);
95     // Note supported floating-point library function operators that otherwise
96     // default to expand.
97     for (auto Op :
98          {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT})
99       setOperationAction(Op, T, Legal);
100     // Support minimum and maximum, which otherwise default to expand.
101     setOperationAction(ISD::FMINIMUM, T, Legal);
102     setOperationAction(ISD::FMAXIMUM, T, Legal);
103     // WebAssembly currently has no builtin f16 support.
104     setOperationAction(ISD::FP16_TO_FP, T, Expand);
105     setOperationAction(ISD::FP_TO_FP16, T, Expand);
106     setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand);
107     setTruncStoreAction(T, MVT::f16, Expand);
108   }
109 
110   // Expand unavailable integer operations.
111   for (auto Op :
112        {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,
113         ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS,
114         ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) {
115     for (auto T : {MVT::i32, MVT::i64})
116       setOperationAction(Op, T, Expand);
117     if (Subtarget->hasSIMD128())
118       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
119         setOperationAction(Op, T, Expand);
120     if (Subtarget->hasUnimplementedSIMD128())
121       setOperationAction(Op, MVT::v2i64, Expand);
122   }
123 
124   // SIMD-specific configuration
125   if (Subtarget->hasSIMD128()) {
126     // Support saturating add for i8x16 and i16x8
127     for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
128       for (auto T : {MVT::v16i8, MVT::v8i16})
129         setOperationAction(Op, T, Legal);
130 
131     // Custom lower BUILD_VECTORs to minimize number of replace_lanes
132     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
133       setOperationAction(ISD::BUILD_VECTOR, T, Custom);
134     if (Subtarget->hasUnimplementedSIMD128())
135       for (auto T : {MVT::v2i64, MVT::v2f64})
136         setOperationAction(ISD::BUILD_VECTOR, T, Custom);
137 
138     // We have custom shuffle lowering to expose the shuffle mask
139     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
140       setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
141     if (Subtarget->hasUnimplementedSIMD128())
142       for (auto T: {MVT::v2i64, MVT::v2f64})
143         setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
144 
145     // Custom lowering since wasm shifts must have a scalar shift amount
146     for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL}) {
147       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
148         setOperationAction(Op, T, Custom);
149       if (Subtarget->hasUnimplementedSIMD128())
150         setOperationAction(Op, MVT::v2i64, Custom);
151     }
152 
153     // Custom lower lane accesses to expand out variable indices
154     for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT}) {
155       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
156         setOperationAction(Op, T, Custom);
157       if (Subtarget->hasUnimplementedSIMD128())
158         for (auto T : {MVT::v2i64, MVT::v2f64})
159           setOperationAction(Op, T, Custom);
160     }
161 
162     // There is no i64x2.mul instruction
163     setOperationAction(ISD::MUL, MVT::v2i64, Expand);
164 
165     // There are no vector select instructions
166     for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT}) {
167       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
168         setOperationAction(Op, T, Expand);
169       if (Subtarget->hasUnimplementedSIMD128())
170         for (auto T : {MVT::v2i64, MVT::v2f64})
171           setOperationAction(Op, T, Expand);
172     }
173 
174     // Expand integer operations supported for scalars but not SIMD
175     for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV,
176                     ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR}) {
177       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
178         setOperationAction(Op, T, Expand);
179       if (Subtarget->hasUnimplementedSIMD128())
180         setOperationAction(Op, MVT::v2i64, Expand);
181     }
182 
183     // Expand float operations supported for scalars but not SIMD
184     for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
185                     ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
186                     ISD::FEXP, ISD::FEXP2, ISD::FRINT}) {
187       setOperationAction(Op, MVT::v4f32, Expand);
188       if (Subtarget->hasUnimplementedSIMD128())
189         setOperationAction(Op, MVT::v2f64, Expand);
190     }
191 
192     // Expand operations not supported for i64x2 vectors
193     if (Subtarget->hasUnimplementedSIMD128())
194       for (unsigned CC = 0; CC < ISD::SETCC_INVALID; ++CC)
195         setCondCodeAction(static_cast<ISD::CondCode>(CC), MVT::v2i64, Custom);
196 
197     // Expand additional SIMD ops that V8 hasn't implemented yet
198     if (!Subtarget->hasUnimplementedSIMD128()) {
199       setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
200       setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
201     }
202   }
203 
204   // As a special case, these operators use the type to mean the type to
205   // sign-extend from.
206   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
207   if (!Subtarget->hasSignExt()) {
208     // Sign extends are legal only when extending a vector extract
209     auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
210     for (auto T : {MVT::i8, MVT::i16, MVT::i32})
211       setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action);
212   }
213   for (auto T : MVT::integer_fixedlen_vector_valuetypes())
214     setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
215 
216   // Dynamic stack allocation: use the default expansion.
217   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
218   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
219   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);
220 
221   setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
222   setOperationAction(ISD::CopyToReg, MVT::Other, Custom);
223 
224   // Expand these forms; we pattern-match the forms that we can handle in isel.
225   for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
226     for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
227       setOperationAction(Op, T, Expand);
228 
229   // We have custom switch handling.
230   setOperationAction(ISD::BR_JT, MVT::Other, Custom);
231 
232   // WebAssembly doesn't have:
233   //  - Floating-point extending loads.
234   //  - Floating-point truncating stores.
235   //  - i1 extending loads.
236   //  - truncating SIMD stores and most extending loads
237   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
238   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
239   for (auto T : MVT::integer_valuetypes())
240     for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
241       setLoadExtAction(Ext, T, MVT::i1, Promote);
242   if (Subtarget->hasSIMD128()) {
243     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
244                    MVT::v2f64}) {
245       for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
246         if (MVT(T) != MemT) {
247           setTruncStoreAction(T, MemT, Expand);
248           for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
249             setLoadExtAction(Ext, T, MemT, Expand);
250         }
251       }
252     }
253     // But some vector extending loads are legal
254     if (Subtarget->hasUnimplementedSIMD128()) {
255       for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
256         setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
257         setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
258         setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
259       }
260     }
261   }
262 
263   // Don't do anything clever with build_pairs
264   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
265 
266   // Trap lowers to wasm unreachable
267   setOperationAction(ISD::TRAP, MVT::Other, Legal);
268 
269   // Exception handling intrinsics
270   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
271   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
272 
273   setMaxAtomicSizeInBitsSupported(64);
274 
275   // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is
276   // consistent with the f64 and f128 names.
277   setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
278   setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
279 
280   // Define the emscripten name for return address helper.
281   // TODO: when implementing other WASM backends, make this generic or only do
282   // this on emscripten depending on what they end up doing.
283   setLibcallName(RTLIB::RETURN_ADDRESS, "emscripten_return_address");
284 
285   // Always convert switches to br_tables unless there is only one case, which
286   // is equivalent to a simple branch. This reduces code size for wasm, and we
287   // defer possible jump table optimizations to the VM.
288   setMinimumJumpTableEntries(2);
289 }
290 
291 TargetLowering::AtomicExpansionKind
292 WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
293   // We have wasm instructions for these
294   switch (AI->getOperation()) {
295   case AtomicRMWInst::Add:
296   case AtomicRMWInst::Sub:
297   case AtomicRMWInst::And:
298   case AtomicRMWInst::Or:
299   case AtomicRMWInst::Xor:
300   case AtomicRMWInst::Xchg:
301     return AtomicExpansionKind::None;
302   default:
303     break;
304   }
305   return AtomicExpansionKind::CmpXChg;
306 }
307 
308 FastISel *WebAssemblyTargetLowering::createFastISel(
309     FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
310   return WebAssembly::createFastISel(FuncInfo, LibInfo);
311 }
312 
313 MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
314                                                       EVT VT) const {
315   unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
316   if (BitWidth > 1 && BitWidth < 8)
317     BitWidth = 8;
318 
319   if (BitWidth > 64) {
320     // The shift will be lowered to a libcall, and compiler-rt libcalls expect
321     // the count to be an i32.
322     BitWidth = 32;
323     assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) &&
324            "32-bit shift counts ought to be enough for anyone");
325   }
326 
327   MVT Result = MVT::getIntegerVT(BitWidth);
328   assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE &&
329          "Unable to represent scalar shift amount type");
330   return Result;
331 }
332 
333 // Lower an fp-to-int conversion operator from the LLVM opcode, which has an
334 // undefined result on invalid/overflow, to the WebAssembly opcode, which
335 // traps on invalid/overflow.
336 static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
337                                        MachineBasicBlock *BB,
338                                        const TargetInstrInfo &TII,
339                                        bool IsUnsigned, bool Int64,
340                                        bool Float64, unsigned LoweredOpcode) {
341   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
342 
343   Register OutReg = MI.getOperand(0).getReg();
344   Register InReg = MI.getOperand(1).getReg();
345 
346   unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
347   unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
348   unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
349   unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
350   unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
351   unsigned Eqz = WebAssembly::EQZ_I32;
352   unsigned And = WebAssembly::AND_I32;
353   int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
354   int64_t Substitute = IsUnsigned ? 0 : Limit;
355   double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
356   auto &Context = BB->getParent()->getFunction().getContext();
357   Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
358 
359   const BasicBlock *LLVMBB = BB->getBasicBlock();
360   MachineFunction *F = BB->getParent();
361   MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
362   MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
363   MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
364 
365   MachineFunction::iterator It = ++BB->getIterator();
366   F->insert(It, FalseMBB);
367   F->insert(It, TrueMBB);
368   F->insert(It, DoneMBB);
369 
370   // Transfer the remainder of BB and its successor edges to DoneMBB.
371   DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
372   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
373 
374   BB->addSuccessor(TrueMBB);
375   BB->addSuccessor(FalseMBB);
376   TrueMBB->addSuccessor(DoneMBB);
377   FalseMBB->addSuccessor(DoneMBB);
378 
379   unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
380   Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
381   Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
382   CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
383   EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
384   FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
385   TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
386 
387   MI.eraseFromParent();
388   // For signed numbers, we can do a single comparison to determine whether
389   // fabs(x) is within range.
390   if (IsUnsigned) {
391     Tmp0 = InReg;
392   } else {
393     BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
394   }
395   BuildMI(BB, DL, TII.get(FConst), Tmp1)
396       .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
397   BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
398 
399   // For unsigned numbers, we have to do a separate comparison with zero.
400   if (IsUnsigned) {
401     Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
402     Register SecondCmpReg =
403         MRI.createVirtualRegister(&WebAssembly::I32RegClass);
404     Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
405     BuildMI(BB, DL, TII.get(FConst), Tmp1)
406         .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
407     BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
408     BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
409     CmpReg = AndReg;
410   }
411 
412   BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
413 
414   // Create the CFG diamond to select between doing the conversion or using
415   // the substitute value.
416   BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
417   BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
418   BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
419   BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
420   BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
421       .addReg(FalseReg)
422       .addMBB(FalseMBB)
423       .addReg(TrueReg)
424       .addMBB(TrueMBB);
425 
426   return DoneMBB;
427 }
428 
429 MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
430     MachineInstr &MI, MachineBasicBlock *BB) const {
431   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
432   DebugLoc DL = MI.getDebugLoc();
433 
434   switch (MI.getOpcode()) {
435   default:
436     llvm_unreachable("Unexpected instr type to insert");
437   case WebAssembly::FP_TO_SINT_I32_F32:
438     return LowerFPToInt(MI, DL, BB, TII, false, false, false,
439                         WebAssembly::I32_TRUNC_S_F32);
440   case WebAssembly::FP_TO_UINT_I32_F32:
441     return LowerFPToInt(MI, DL, BB, TII, true, false, false,
442                         WebAssembly::I32_TRUNC_U_F32);
443   case WebAssembly::FP_TO_SINT_I64_F32:
444     return LowerFPToInt(MI, DL, BB, TII, false, true, false,
445                         WebAssembly::I64_TRUNC_S_F32);
446   case WebAssembly::FP_TO_UINT_I64_F32:
447     return LowerFPToInt(MI, DL, BB, TII, true, true, false,
448                         WebAssembly::I64_TRUNC_U_F32);
449   case WebAssembly::FP_TO_SINT_I32_F64:
450     return LowerFPToInt(MI, DL, BB, TII, false, false, true,
451                         WebAssembly::I32_TRUNC_S_F64);
452   case WebAssembly::FP_TO_UINT_I32_F64:
453     return LowerFPToInt(MI, DL, BB, TII, true, false, true,
454                         WebAssembly::I32_TRUNC_U_F64);
455   case WebAssembly::FP_TO_SINT_I64_F64:
456     return LowerFPToInt(MI, DL, BB, TII, false, true, true,
457                         WebAssembly::I64_TRUNC_S_F64);
458   case WebAssembly::FP_TO_UINT_I64_F64:
459     return LowerFPToInt(MI, DL, BB, TII, true, true, true,
460                         WebAssembly::I64_TRUNC_U_F64);
461     llvm_unreachable("Unexpected instruction to emit with custom inserter");
462   }
463 }
464 
465 const char *
466 WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {
467   switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
468   case WebAssemblyISD::FIRST_NUMBER:
469   case WebAssemblyISD::FIRST_MEM_OPCODE:
470     break;
471 #define HANDLE_NODETYPE(NODE)                                                  \
472   case WebAssemblyISD::NODE:                                                   \
473     return "WebAssemblyISD::" #NODE;
474 #define HANDLE_MEM_NODETYPE(NODE) HANDLE_NODETYPE(NODE)
475 #include "WebAssemblyISD.def"
476 #undef HANDLE_MEM_NODETYPE
477 #undef HANDLE_NODETYPE
478   }
479   return nullptr;
480 }
481 
482 std::pair<unsigned, const TargetRegisterClass *>
483 WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
484     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
485   // First, see if this is a constraint that directly corresponds to a
486   // WebAssembly register class.
487   if (Constraint.size() == 1) {
488     switch (Constraint[0]) {
489     case 'r':
490       assert(VT != MVT::iPTR && "Pointer MVT not expected here");
491       if (Subtarget->hasSIMD128() && VT.isVector()) {
492         if (VT.getSizeInBits() == 128)
493           return std::make_pair(0U, &WebAssembly::V128RegClass);
494       }
495       if (VT.isInteger() && !VT.isVector()) {
496         if (VT.getSizeInBits() <= 32)
497           return std::make_pair(0U, &WebAssembly::I32RegClass);
498         if (VT.getSizeInBits() <= 64)
499           return std::make_pair(0U, &WebAssembly::I64RegClass);
500       }
501       break;
502     default:
503       break;
504     }
505   }
506 
507   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
508 }
509 
510 bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const {
511   // Assume ctz is a relatively cheap operation.
512   return true;
513 }
514 
515 bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const {
516   // Assume clz is a relatively cheap operation.
517   return true;
518 }
519 
520 bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
521                                                       const AddrMode &AM,
522                                                       Type *Ty, unsigned AS,
523                                                       Instruction *I) const {
524   // WebAssembly offsets are added as unsigned without wrapping. The
525   // isLegalAddressingMode gives us no way to determine if wrapping could be
526   // happening, so we approximate this by accepting only non-negative offsets.
527   if (AM.BaseOffs < 0)
528     return false;
529 
530   // WebAssembly has no scale register operands.
531   if (AM.Scale != 0)
532     return false;
533 
534   // Everything else is legal.
535   return true;
536 }
537 
538 bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
539     EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/,
540     MachineMemOperand::Flags /*Flags*/, bool *Fast) const {
541   // WebAssembly supports unaligned accesses, though it should be declared
542   // with the p2align attribute on loads and stores which do so, and there
543   // may be a performance impact. We tell LLVM they're "fast" because
544   // for the kinds of things that LLVM uses this for (merging adjacent stores
545   // of constants, etc.), WebAssembly implementations will either want the
546   // unaligned access or they'll split anyway.
547   if (Fast)
548     *Fast = true;
549   return true;
550 }
551 
552 bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
553                                               AttributeList Attr) const {
554   // The current thinking is that wasm engines will perform this optimization,
555   // so we can save on code size.
556   return true;
557 }
558 
559 bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
560   if (!Subtarget->hasUnimplementedSIMD128())
561     return false;
562   MVT ExtT = ExtVal.getSimpleValueType();
563   MVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getSimpleValueType(0);
564   return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
565          (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
566          (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
567 }
568 
569 EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
570                                                   LLVMContext &C,
571                                                   EVT VT) const {
572   if (VT.isVector())
573     return VT.changeVectorElementTypeToInteger();
574 
575   return TargetLowering::getSetCCResultType(DL, C, VT);
576 }
577 
578 bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
579                                                    const CallInst &I,
580                                                    MachineFunction &MF,
581                                                    unsigned Intrinsic) const {
582   switch (Intrinsic) {
583   case Intrinsic::wasm_atomic_notify:
584     Info.opc = ISD::INTRINSIC_W_CHAIN;
585     Info.memVT = MVT::i32;
586     Info.ptrVal = I.getArgOperand(0);
587     Info.offset = 0;
588     Info.align = Align(4);
589     // atomic.notify instruction does not really load the memory specified with
590     // this argument, but MachineMemOperand should either be load or store, so
591     // we set this to a load.
592     // FIXME Volatile isn't really correct, but currently all LLVM atomic
593     // instructions are treated as volatiles in the backend, so we should be
594     // consistent. The same applies for wasm_atomic_wait intrinsics too.
595     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
596     return true;
597   case Intrinsic::wasm_atomic_wait_i32:
598     Info.opc = ISD::INTRINSIC_W_CHAIN;
599     Info.memVT = MVT::i32;
600     Info.ptrVal = I.getArgOperand(0);
601     Info.offset = 0;
602     Info.align = Align(4);
603     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
604     return true;
605   case Intrinsic::wasm_atomic_wait_i64:
606     Info.opc = ISD::INTRINSIC_W_CHAIN;
607     Info.memVT = MVT::i64;
608     Info.ptrVal = I.getArgOperand(0);
609     Info.offset = 0;
610     Info.align = Align(8);
611     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
612     return true;
613   default:
614     return false;
615   }
616 }
617 
618 //===----------------------------------------------------------------------===//
619 // WebAssembly Lowering private implementation.
620 //===----------------------------------------------------------------------===//
621 
622 //===----------------------------------------------------------------------===//
623 // Lowering Code
624 //===----------------------------------------------------------------------===//
625 
626 static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
627   MachineFunction &MF = DAG.getMachineFunction();
628   DAG.getContext()->diagnose(
629       DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
630 }
631 
632 // Test whether the given calling convention is supported.
633 static bool callingConvSupported(CallingConv::ID CallConv) {
634   // We currently support the language-independent target-independent
635   // conventions. We don't yet have a way to annotate calls with properties like
636   // "cold", and we don't have any call-clobbered registers, so these are mostly
637   // all handled the same.
638   return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
639          CallConv == CallingConv::Cold ||
640          CallConv == CallingConv::PreserveMost ||
641          CallConv == CallingConv::PreserveAll ||
642          CallConv == CallingConv::CXX_FAST_TLS ||
643          CallConv == CallingConv::WASM_EmscriptenInvoke;
644 }
645 
646 SDValue
647 WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
648                                      SmallVectorImpl<SDValue> &InVals) const {
649   SelectionDAG &DAG = CLI.DAG;
650   SDLoc DL = CLI.DL;
651   SDValue Chain = CLI.Chain;
652   SDValue Callee = CLI.Callee;
653   MachineFunction &MF = DAG.getMachineFunction();
654   auto Layout = MF.getDataLayout();
655 
656   CallingConv::ID CallConv = CLI.CallConv;
657   if (!callingConvSupported(CallConv))
658     fail(DL, DAG,
659          "WebAssembly doesn't support language-specific or target-specific "
660          "calling conventions yet");
661   if (CLI.IsPatchPoint)
662     fail(DL, DAG, "WebAssembly doesn't support patch point yet");
663 
664   if (CLI.IsTailCall) {
665     bool MustTail = CLI.CS && CLI.CS.isMustTailCall();
666     if (Subtarget->hasTailCall() && !CLI.IsVarArg) {
667       // Do not tail call unless caller and callee return types match
668       const Function &F = MF.getFunction();
669       const TargetMachine &TM = getTargetMachine();
670       Type *RetTy = F.getReturnType();
671       SmallVector<MVT, 4> CallerRetTys;
672       SmallVector<MVT, 4> CalleeRetTys;
673       computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
674       computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
675       bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
676                         std::equal(CallerRetTys.begin(), CallerRetTys.end(),
677                                    CalleeRetTys.begin());
678       if (!TypesMatch) {
679         // musttail in this case would be an LLVM IR validation failure
680         assert(!MustTail);
681         CLI.IsTailCall = false;
682       }
683     } else {
684       CLI.IsTailCall = false;
685       if (MustTail) {
686         if (CLI.IsVarArg) {
687           // The return would pop the argument buffer
688           fail(DL, DAG, "WebAssembly does not support varargs tail calls");
689         } else {
690           fail(DL, DAG, "WebAssembly 'tail-call' feature not enabled");
691         }
692       }
693     }
694   }
695 
696   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
697   if (Ins.size() > 1)
698     fail(DL, DAG, "WebAssembly doesn't support more than 1 returned value yet");
699 
700   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
701   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
702 
703   // The generic code may have added an sret argument. If we're lowering an
704   // invoke function, the ABI requires that the function pointer be the first
705   // argument, so we may have to swap the arguments.
706   if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
707       Outs[0].Flags.isSRet()) {
708     std::swap(Outs[0], Outs[1]);
709     std::swap(OutVals[0], OutVals[1]);
710   }
711 
712   unsigned NumFixedArgs = 0;
713   for (unsigned I = 0; I < Outs.size(); ++I) {
714     const ISD::OutputArg &Out = Outs[I];
715     SDValue &OutVal = OutVals[I];
716     if (Out.Flags.isNest())
717       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
718     if (Out.Flags.isInAlloca())
719       fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
720     if (Out.Flags.isInConsecutiveRegs())
721       fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
722     if (Out.Flags.isInConsecutiveRegsLast())
723       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
724     if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
725       auto &MFI = MF.getFrameInfo();
726       int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
727                                      Out.Flags.getByValAlign(),
728                                      /*isSS=*/false);
729       SDValue SizeNode =
730           DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
731       SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
732       Chain = DAG.getMemcpy(
733           Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getByValAlign(),
734           /*isVolatile*/ false, /*AlwaysInline=*/false,
735           /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
736       OutVal = FINode;
737     }
738     // Count the number of fixed args *after* legalization.
739     NumFixedArgs += Out.IsFixed;
740   }
741 
742   bool IsVarArg = CLI.IsVarArg;
743   auto PtrVT = getPointerTy(Layout);
744 
745   // Analyze operands of the call, assigning locations to each operand.
746   SmallVector<CCValAssign, 16> ArgLocs;
747   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
748 
749   if (IsVarArg) {
750     // Outgoing non-fixed arguments are placed in a buffer. First
751     // compute their offsets and the total amount of buffer space needed.
752     for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
753       const ISD::OutputArg &Out = Outs[I];
754       SDValue &Arg = OutVals[I];
755       EVT VT = Arg.getValueType();
756       assert(VT != MVT::iPTR && "Legalized args should be concrete");
757       Type *Ty = VT.getTypeForEVT(*DAG.getContext());
758       unsigned Align = std::max(Out.Flags.getOrigAlign(),
759                                 Layout.getABITypeAlignment(Ty));
760       unsigned Offset = CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty),
761                                              Align);
762       CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
763                                         Offset, VT.getSimpleVT(),
764                                         CCValAssign::Full));
765     }
766   }
767 
768   unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
769 
770   SDValue FINode;
771   if (IsVarArg && NumBytes) {
772     // For non-fixed arguments, next emit stores to store the argument values
773     // to the stack buffer at the offsets computed above.
774     int FI = MF.getFrameInfo().CreateStackObject(NumBytes,
775                                                  Layout.getStackAlignment(),
776                                                  /*isSS=*/false);
777     unsigned ValNo = 0;
778     SmallVector<SDValue, 8> Chains;
779     for (SDValue Arg :
780          make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) {
781       assert(ArgLocs[ValNo].getValNo() == ValNo &&
782              "ArgLocs should remain in order and only hold varargs args");
783       unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
784       FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
785       SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
786                                 DAG.getConstant(Offset, DL, PtrVT));
787       Chains.push_back(
788           DAG.getStore(Chain, DL, Arg, Add,
789                        MachinePointerInfo::getFixedStack(MF, FI, Offset), 0));
790     }
791     if (!Chains.empty())
792       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
793   } else if (IsVarArg) {
794     FINode = DAG.getIntPtrConstant(0, DL);
795   }
796 
797   if (Callee->getOpcode() == ISD::GlobalAddress) {
798     // If the callee is a GlobalAddress node (quite common, every direct call
799     // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
800     // doesn't at MO_GOT which is not needed for direct calls.
801     GlobalAddressSDNode* GA = cast<GlobalAddressSDNode>(Callee);
802     Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
803                                         getPointerTy(DAG.getDataLayout()),
804                                         GA->getOffset());
805     Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
806                          getPointerTy(DAG.getDataLayout()), Callee);
807   }
808 
809   // Compute the operands for the CALLn node.
810   SmallVector<SDValue, 16> Ops;
811   Ops.push_back(Chain);
812   Ops.push_back(Callee);
813 
814   // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
815   // isn't reliable.
816   Ops.append(OutVals.begin(),
817              IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
818   // Add a pointer to the vararg buffer.
819   if (IsVarArg)
820     Ops.push_back(FINode);
821 
822   SmallVector<EVT, 8> InTys;
823   for (const auto &In : Ins) {
824     assert(!In.Flags.isByVal() && "byval is not valid for return values");
825     assert(!In.Flags.isNest() && "nest is not valid for return values");
826     if (In.Flags.isInAlloca())
827       fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
828     if (In.Flags.isInConsecutiveRegs())
829       fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
830     if (In.Flags.isInConsecutiveRegsLast())
831       fail(DL, DAG,
832            "WebAssembly hasn't implemented cons regs last return values");
833     // Ignore In.getOrigAlign() because all our arguments are passed in
834     // registers.
835     InTys.push_back(In.VT);
836   }
837 
838   if (CLI.IsTailCall) {
839     // ret_calls do not return values to the current frame
840     SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
841     return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
842   }
843 
844   InTys.push_back(MVT::Other);
845   SDVTList InTyList = DAG.getVTList(InTys);
846   SDValue Res =
847       DAG.getNode(Ins.empty() ? WebAssemblyISD::CALL0 : WebAssemblyISD::CALL1,
848                   DL, InTyList, Ops);
849   if (Ins.empty()) {
850     Chain = Res;
851   } else {
852     InVals.push_back(Res);
853     Chain = Res.getValue(1);
854   }
855 
856   return Chain;
857 }
858 
859 bool WebAssemblyTargetLowering::CanLowerReturn(
860     CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
861     const SmallVectorImpl<ISD::OutputArg> &Outs,
862     LLVMContext & /*Context*/) const {
863   // WebAssembly can only handle returning tuples with multivalue enabled
864   return Subtarget->hasMultivalue() || Outs.size() <= 1;
865 }
866 
867 SDValue WebAssemblyTargetLowering::LowerReturn(
868     SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
869     const SmallVectorImpl<ISD::OutputArg> &Outs,
870     const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
871     SelectionDAG &DAG) const {
872   assert((Subtarget->hasMultivalue() || Outs.size() <= 1) &&
873          "MVP WebAssembly can only return up to one value");
874   if (!callingConvSupported(CallConv))
875     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
876 
877   SmallVector<SDValue, 4> RetOps(1, Chain);
878   RetOps.append(OutVals.begin(), OutVals.end());
879   Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
880 
881   // Record the number and types of the return values.
882   for (const ISD::OutputArg &Out : Outs) {
883     assert(!Out.Flags.isByVal() && "byval is not valid for return values");
884     assert(!Out.Flags.isNest() && "nest is not valid for return values");
885     assert(Out.IsFixed && "non-fixed return value is not valid");
886     if (Out.Flags.isInAlloca())
887       fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
888     if (Out.Flags.isInConsecutiveRegs())
889       fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
890     if (Out.Flags.isInConsecutiveRegsLast())
891       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
892   }
893 
894   return Chain;
895 }
896 
897 SDValue WebAssemblyTargetLowering::LowerFormalArguments(
898     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
899     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
900     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
901   if (!callingConvSupported(CallConv))
902     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
903 
904   MachineFunction &MF = DAG.getMachineFunction();
905   auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
906 
907   // Set up the incoming ARGUMENTS value, which serves to represent the liveness
908   // of the incoming values before they're represented by virtual registers.
909   MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
910 
911   for (const ISD::InputArg &In : Ins) {
912     if (In.Flags.isInAlloca())
913       fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
914     if (In.Flags.isNest())
915       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
916     if (In.Flags.isInConsecutiveRegs())
917       fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
918     if (In.Flags.isInConsecutiveRegsLast())
919       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
920     // Ignore In.getOrigAlign() because all our arguments are passed in
921     // registers.
922     InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
923                                            DAG.getTargetConstant(InVals.size(),
924                                                                  DL, MVT::i32))
925                              : DAG.getUNDEF(In.VT));
926 
927     // Record the number and types of arguments.
928     MFI->addParam(In.VT);
929   }
930 
931   // Varargs are copied into a buffer allocated by the caller, and a pointer to
932   // the buffer is passed as an argument.
933   if (IsVarArg) {
934     MVT PtrVT = getPointerTy(MF.getDataLayout());
935     Register VarargVreg =
936         MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT));
937     MFI->setVarargBufferVreg(VarargVreg);
938     Chain = DAG.getCopyToReg(
939         Chain, DL, VarargVreg,
940         DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
941                     DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
942     MFI->addParam(PtrVT);
943   }
944 
945   // Record the number and types of arguments and results.
946   SmallVector<MVT, 4> Params;
947   SmallVector<MVT, 4> Results;
948   computeSignatureVTs(MF.getFunction().getFunctionType(), MF.getFunction(),
949                       DAG.getTarget(), Params, Results);
950   for (MVT VT : Results)
951     MFI->addResult(VT);
952   // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
953   // the param logic here with ComputeSignatureVTs
954   assert(MFI->getParams().size() == Params.size() &&
955          std::equal(MFI->getParams().begin(), MFI->getParams().end(),
956                     Params.begin()));
957 
958   return Chain;
959 }
960 
961 void WebAssemblyTargetLowering::ReplaceNodeResults(
962     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
963   switch (N->getOpcode()) {
964   case ISD::SIGN_EXTEND_INREG:
965     // Do not add any results, signifying that N should not be custom lowered
966     // after all. This happens because simd128 turns on custom lowering for
967     // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
968     // illegal type.
969     break;
970   default:
971     llvm_unreachable(
972         "ReplaceNodeResults not implemented for this op for WebAssembly!");
973   }
974 }
975 
976 //===----------------------------------------------------------------------===//
977 //  Custom lowering hooks.
978 //===----------------------------------------------------------------------===//
979 
980 SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
981                                                   SelectionDAG &DAG) const {
982   SDLoc DL(Op);
983   switch (Op.getOpcode()) {
984   default:
985     llvm_unreachable("unimplemented operation lowering");
986     return SDValue();
987   case ISD::FrameIndex:
988     return LowerFrameIndex(Op, DAG);
989   case ISD::GlobalAddress:
990     return LowerGlobalAddress(Op, DAG);
991   case ISD::ExternalSymbol:
992     return LowerExternalSymbol(Op, DAG);
993   case ISD::JumpTable:
994     return LowerJumpTable(Op, DAG);
995   case ISD::BR_JT:
996     return LowerBR_JT(Op, DAG);
997   case ISD::VASTART:
998     return LowerVASTART(Op, DAG);
999   case ISD::BlockAddress:
1000   case ISD::BRIND:
1001     fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1002     return SDValue();
1003   case ISD::RETURNADDR:
1004     return LowerRETURNADDR(Op, DAG);
1005   case ISD::FRAMEADDR:
1006     return LowerFRAMEADDR(Op, DAG);
1007   case ISD::CopyToReg:
1008     return LowerCopyToReg(Op, DAG);
1009   case ISD::EXTRACT_VECTOR_ELT:
1010   case ISD::INSERT_VECTOR_ELT:
1011     return LowerAccessVectorElement(Op, DAG);
1012   case ISD::INTRINSIC_VOID:
1013   case ISD::INTRINSIC_WO_CHAIN:
1014   case ISD::INTRINSIC_W_CHAIN:
1015     return LowerIntrinsic(Op, DAG);
1016   case ISD::SIGN_EXTEND_INREG:
1017     return LowerSIGN_EXTEND_INREG(Op, DAG);
1018   case ISD::BUILD_VECTOR:
1019     return LowerBUILD_VECTOR(Op, DAG);
1020   case ISD::VECTOR_SHUFFLE:
1021     return LowerVECTOR_SHUFFLE(Op, DAG);
1022   case ISD::SETCC:
1023     return LowerSETCC(Op, DAG);
1024   case ISD::SHL:
1025   case ISD::SRA:
1026   case ISD::SRL:
1027     return LowerShift(Op, DAG);
1028   }
1029 }
1030 
1031 SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1032                                                   SelectionDAG &DAG) const {
1033   SDValue Src = Op.getOperand(2);
1034   if (isa<FrameIndexSDNode>(Src.getNode())) {
1035     // CopyToReg nodes don't support FrameIndex operands. Other targets select
1036     // the FI to some LEA-like instruction, but since we don't have that, we
1037     // need to insert some kind of instruction that can take an FI operand and
1038     // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1039     // local.copy between Op and its FI operand.
1040     SDValue Chain = Op.getOperand(0);
1041     SDLoc DL(Op);
1042     unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
1043     EVT VT = Src.getValueType();
1044     SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
1045                                                    : WebAssembly::COPY_I64,
1046                                     DL, VT, Src),
1047                  0);
1048     return Op.getNode()->getNumValues() == 1
1049                ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
1050                : DAG.getCopyToReg(Chain, DL, Reg, Copy,
1051                                   Op.getNumOperands() == 4 ? Op.getOperand(3)
1052                                                            : SDValue());
1053   }
1054   return SDValue();
1055 }
1056 
1057 SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
1058                                                    SelectionDAG &DAG) const {
1059   int FI = cast<FrameIndexSDNode>(Op)->getIndex();
1060   return DAG.getTargetFrameIndex(FI, Op.getValueType());
1061 }
1062 
1063 SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
1064                                                    SelectionDAG &DAG) const {
1065   SDLoc DL(Op);
1066 
1067   if (!Subtarget->getTargetTriple().isOSEmscripten()) {
1068     fail(DL, DAG,
1069          "Non-Emscripten WebAssembly hasn't implemented "
1070          "__builtin_return_address");
1071     return SDValue();
1072   }
1073 
1074   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1075     return SDValue();
1076 
1077   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1078   MakeLibCallOptions CallOptions;
1079   return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
1080                      {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
1081       .first;
1082 }
1083 
1084 SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
1085                                                   SelectionDAG &DAG) const {
1086   // Non-zero depths are not supported by WebAssembly currently. Use the
1087   // legalizer's default expansion, which is to return 0 (what this function is
1088   // documented to do).
1089   if (Op.getConstantOperandVal(0) > 0)
1090     return SDValue();
1091 
1092   DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
1093   EVT VT = Op.getValueType();
1094   Register FP =
1095       Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
1096   return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
1097 }
1098 
1099 SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
1100                                                       SelectionDAG &DAG) const {
1101   SDLoc DL(Op);
1102   const auto *GA = cast<GlobalAddressSDNode>(Op);
1103   EVT VT = Op.getValueType();
1104   assert(GA->getTargetFlags() == 0 &&
1105          "Unexpected target flags on generic GlobalAddressSDNode");
1106   if (GA->getAddressSpace() != 0)
1107     fail(DL, DAG, "WebAssembly only expects the 0 address space");
1108 
1109   unsigned OperandFlags = 0;
1110   if (isPositionIndependent()) {
1111     const GlobalValue *GV = GA->getGlobal();
1112     if (getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) {
1113       MachineFunction &MF = DAG.getMachineFunction();
1114       MVT PtrVT = getPointerTy(MF.getDataLayout());
1115       const char *BaseName;
1116       if (GV->getValueType()->isFunctionTy()) {
1117         BaseName = MF.createExternalSymbolName("__table_base");
1118         OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL;
1119       }
1120       else {
1121         BaseName = MF.createExternalSymbolName("__memory_base");
1122         OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL;
1123       }
1124       SDValue BaseAddr =
1125           DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
1126                       DAG.getTargetExternalSymbol(BaseName, PtrVT));
1127 
1128       SDValue SymAddr = DAG.getNode(
1129           WebAssemblyISD::WrapperPIC, DL, VT,
1130           DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
1131                                      OperandFlags));
1132 
1133       return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
1134     } else {
1135       OperandFlags = WebAssemblyII::MO_GOT;
1136     }
1137   }
1138 
1139   return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1140                      DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
1141                                                 GA->getOffset(), OperandFlags));
1142 }
1143 
1144 SDValue
1145 WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
1146                                                SelectionDAG &DAG) const {
1147   SDLoc DL(Op);
1148   const auto *ES = cast<ExternalSymbolSDNode>(Op);
1149   EVT VT = Op.getValueType();
1150   assert(ES->getTargetFlags() == 0 &&
1151          "Unexpected target flags on generic ExternalSymbolSDNode");
1152   return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1153                      DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
1154 }
1155 
1156 SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
1157                                                   SelectionDAG &DAG) const {
1158   // There's no need for a Wrapper node because we always incorporate a jump
1159   // table operand into a BR_TABLE instruction, rather than ever
1160   // materializing it in a register.
1161   const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1162   return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
1163                                 JT->getTargetFlags());
1164 }
1165 
1166 SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
1167                                               SelectionDAG &DAG) const {
1168   SDLoc DL(Op);
1169   SDValue Chain = Op.getOperand(0);
1170   const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
1171   SDValue Index = Op.getOperand(2);
1172   assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
1173 
1174   SmallVector<SDValue, 8> Ops;
1175   Ops.push_back(Chain);
1176   Ops.push_back(Index);
1177 
1178   MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
1179   const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
1180 
1181   // Add an operand for each case.
1182   for (auto MBB : MBBs)
1183     Ops.push_back(DAG.getBasicBlock(MBB));
1184 
1185   // TODO: For now, we just pick something arbitrary for a default case for now.
1186   // We really want to sniff out the guard and put in the real default case (and
1187   // delete the guard).
1188   Ops.push_back(DAG.getBasicBlock(MBBs[0]));
1189 
1190   return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
1191 }
1192 
1193 SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
1194                                                 SelectionDAG &DAG) const {
1195   SDLoc DL(Op);
1196   EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
1197 
1198   auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
1199   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1200 
1201   SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
1202                                     MFI->getVarargBufferVreg(), PtrVT);
1203   return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
1204                       MachinePointerInfo(SV), 0);
1205 }
1206 
1207 SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
1208                                                   SelectionDAG &DAG) const {
1209   MachineFunction &MF = DAG.getMachineFunction();
1210   unsigned IntNo;
1211   switch (Op.getOpcode()) {
1212   case ISD::INTRINSIC_VOID:
1213   case ISD::INTRINSIC_W_CHAIN:
1214     IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1215     break;
1216   case ISD::INTRINSIC_WO_CHAIN:
1217     IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1218     break;
1219   default:
1220     llvm_unreachable("Invalid intrinsic");
1221   }
1222   SDLoc DL(Op);
1223 
1224   switch (IntNo) {
1225   default:
1226     return SDValue(); // Don't custom lower most intrinsics.
1227 
1228   case Intrinsic::wasm_lsda: {
1229     EVT VT = Op.getValueType();
1230     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1231     MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
1232     auto &Context = MF.getMMI().getContext();
1233     MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") +
1234                                             Twine(MF.getFunctionNumber()));
1235     return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1236                        DAG.getMCSymbol(S, PtrVT));
1237   }
1238 
1239   case Intrinsic::wasm_throw: {
1240     // We only support C++ exceptions for now
1241     int Tag = cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1242     if (Tag != CPP_EXCEPTION)
1243       llvm_unreachable("Invalid tag!");
1244     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1245     MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
1246     const char *SymName = MF.createExternalSymbolName("__cpp_exception");
1247     SDValue SymNode = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
1248                                   DAG.getTargetExternalSymbol(SymName, PtrVT));
1249     return DAG.getNode(WebAssemblyISD::THROW, DL,
1250                        MVT::Other, // outchain type
1251                        {
1252                            Op.getOperand(0), // inchain
1253                            SymNode,          // exception symbol
1254                            Op.getOperand(3)  // thrown value
1255                        });
1256   }
1257   }
1258 }
1259 
1260 SDValue
1261 WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
1262                                                   SelectionDAG &DAG) const {
1263   SDLoc DL(Op);
1264   // If sign extension operations are disabled, allow sext_inreg only if operand
1265   // is a vector extract. SIMD does not depend on sign extension operations, but
1266   // allowing sext_inreg in this context lets us have simple patterns to select
1267   // extract_lane_s instructions. Expanding sext_inreg everywhere would be
1268   // simpler in this file, but would necessitate large and brittle patterns to
1269   // undo the expansion and select extract_lane_s instructions.
1270   assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
1271   if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1272     const SDValue &Extract = Op.getOperand(0);
1273     MVT VecT = Extract.getOperand(0).getSimpleValueType();
1274     MVT ExtractedLaneT = static_cast<VTSDNode *>(Op.getOperand(1).getNode())
1275                              ->getVT()
1276                              .getSimpleVT();
1277     MVT ExtractedVecT =
1278         MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
1279     if (ExtractedVecT == VecT)
1280       return Op;
1281     // Bitcast vector to appropriate type to ensure ISel pattern coverage
1282     const SDValue &Index = Extract.getOperand(1);
1283     unsigned IndexVal =
1284         static_cast<ConstantSDNode *>(Index.getNode())->getZExtValue();
1285     unsigned Scale =
1286         ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
1287     assert(Scale > 1);
1288     SDValue NewIndex =
1289         DAG.getConstant(IndexVal * Scale, DL, Index.getValueType());
1290     SDValue NewExtract = DAG.getNode(
1291         ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(),
1292         DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
1293     return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(),
1294                        NewExtract, Op.getOperand(1));
1295   }
1296   // Otherwise expand
1297   return SDValue();
1298 }
1299 
1300 SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
1301                                                      SelectionDAG &DAG) const {
1302   SDLoc DL(Op);
1303   const EVT VecT = Op.getValueType();
1304   const EVT LaneT = Op.getOperand(0).getValueType();
1305   const size_t Lanes = Op.getNumOperands();
1306   bool CanSwizzle = Subtarget->hasUnimplementedSIMD128() && VecT == MVT::v16i8;
1307 
1308   // BUILD_VECTORs are lowered to the instruction that initializes the highest
1309   // possible number of lanes at once followed by a sequence of replace_lane
1310   // instructions to individually initialize any remaining lanes.
1311 
1312   // TODO: Tune this. For example, lanewise swizzling is very expensive, so
1313   // swizzled lanes should be given greater weight.
1314 
1315   // TODO: Investigate building vectors by shuffling together vectors built by
1316   // separately specialized means.
1317 
1318   auto IsConstant = [](const SDValue &V) {
1319     return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
1320   };
1321 
1322   // Returns the source vector and index vector pair if they exist. Checks for:
1323   //   (extract_vector_elt
1324   //     $src,
1325   //     (sign_extend_inreg (extract_vector_elt $indices, $i))
1326   //   )
1327   auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
1328     auto Bail = std::make_pair(SDValue(), SDValue());
1329     if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1330       return Bail;
1331     const SDValue &SwizzleSrc = Lane->getOperand(0);
1332     const SDValue &IndexExt = Lane->getOperand(1);
1333     if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
1334       return Bail;
1335     const SDValue &Index = IndexExt->getOperand(0);
1336     if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1337       return Bail;
1338     const SDValue &SwizzleIndices = Index->getOperand(0);
1339     if (SwizzleSrc.getValueType() != MVT::v16i8 ||
1340         SwizzleIndices.getValueType() != MVT::v16i8 ||
1341         Index->getOperand(1)->getOpcode() != ISD::Constant ||
1342         Index->getConstantOperandVal(1) != I)
1343       return Bail;
1344     return std::make_pair(SwizzleSrc, SwizzleIndices);
1345   };
1346 
1347   using ValueEntry = std::pair<SDValue, size_t>;
1348   SmallVector<ValueEntry, 16> SplatValueCounts;
1349 
1350   using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
1351   SmallVector<SwizzleEntry, 16> SwizzleCounts;
1352 
1353   auto AddCount = [](auto &Counts, const auto &Val) {
1354     auto CountIt = std::find_if(Counts.begin(), Counts.end(),
1355                                 [&Val](auto E) { return E.first == Val; });
1356     if (CountIt == Counts.end()) {
1357       Counts.emplace_back(Val, 1);
1358     } else {
1359       CountIt->second++;
1360     }
1361   };
1362 
1363   auto GetMostCommon = [](auto &Counts) {
1364     auto CommonIt =
1365         std::max_element(Counts.begin(), Counts.end(),
1366                          [](auto A, auto B) { return A.second < B.second; });
1367     assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
1368     return *CommonIt;
1369   };
1370 
1371   size_t NumConstantLanes = 0;
1372 
1373   // Count eligible lanes for each type of vector creation op
1374   for (size_t I = 0; I < Lanes; ++I) {
1375     const SDValue &Lane = Op->getOperand(I);
1376     if (Lane.isUndef())
1377       continue;
1378 
1379     AddCount(SplatValueCounts, Lane);
1380 
1381     if (IsConstant(Lane)) {
1382       NumConstantLanes++;
1383     } else if (CanSwizzle) {
1384       auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
1385       if (SwizzleSrcs.first)
1386         AddCount(SwizzleCounts, SwizzleSrcs);
1387     }
1388   }
1389 
1390   SDValue SplatValue;
1391   size_t NumSplatLanes;
1392   std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
1393 
1394   SDValue SwizzleSrc;
1395   SDValue SwizzleIndices;
1396   size_t NumSwizzleLanes = 0;
1397   if (SwizzleCounts.size())
1398     std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
1399                           NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
1400 
1401   // Predicate returning true if the lane is properly initialized by the
1402   // original instruction
1403   std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
1404   SDValue Result;
1405   if (Subtarget->hasUnimplementedSIMD128()) {
1406     // Prefer swizzles over vector consts over splats
1407     if (NumSwizzleLanes >= NumSplatLanes &&
1408         NumSwizzleLanes >= NumConstantLanes) {
1409       Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
1410                            SwizzleIndices);
1411       auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
1412       IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
1413         return Swizzled == GetSwizzleSrcs(I, Lane);
1414       };
1415     } else if (NumConstantLanes >= NumSplatLanes) {
1416       SmallVector<SDValue, 16> ConstLanes;
1417       for (const SDValue &Lane : Op->op_values()) {
1418         if (IsConstant(Lane)) {
1419           ConstLanes.push_back(Lane);
1420         } else if (LaneT.isFloatingPoint()) {
1421           ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
1422         } else {
1423           ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
1424         }
1425       }
1426       Result = DAG.getBuildVector(VecT, DL, ConstLanes);
1427       IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
1428         return IsConstant(Lane);
1429       };
1430     }
1431   }
1432   if (!Result) {
1433     // Use a splat, but possibly a load_splat
1434     LoadSDNode *SplattedLoad;
1435     if (Subtarget->hasUnimplementedSIMD128() &&
1436         (SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) &&
1437         SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) {
1438       Result = DAG.getMemIntrinsicNode(
1439           WebAssemblyISD::LOAD_SPLAT, DL, DAG.getVTList(VecT),
1440           {SplattedLoad->getChain(), SplattedLoad->getBasePtr(),
1441            SplattedLoad->getOffset()},
1442           SplattedLoad->getMemoryVT(), SplattedLoad->getMemOperand());
1443     } else {
1444       Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
1445     }
1446     IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
1447       return Lane == SplatValue;
1448     };
1449   }
1450 
1451   // Add replace_lane instructions for any unhandled values
1452   for (size_t I = 0; I < Lanes; ++I) {
1453     const SDValue &Lane = Op->getOperand(I);
1454     if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
1455       Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
1456                            DAG.getConstant(I, DL, MVT::i32));
1457   }
1458 
1459   return Result;
1460 }
1461 
1462 SDValue
1463 WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
1464                                                SelectionDAG &DAG) const {
1465   SDLoc DL(Op);
1466   ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
1467   MVT VecType = Op.getOperand(0).getSimpleValueType();
1468   assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
1469   size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
1470 
1471   // Space for two vector args and sixteen mask indices
1472   SDValue Ops[18];
1473   size_t OpIdx = 0;
1474   Ops[OpIdx++] = Op.getOperand(0);
1475   Ops[OpIdx++] = Op.getOperand(1);
1476 
1477   // Expand mask indices to byte indices and materialize them as operands
1478   for (int M : Mask) {
1479     for (size_t J = 0; J < LaneBytes; ++J) {
1480       // Lower undefs (represented by -1 in mask) to zero
1481       uint64_t ByteIndex = M == -1 ? 0 : (uint64_t)M * LaneBytes + J;
1482       Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
1483     }
1484   }
1485 
1486   return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
1487 }
1488 
1489 SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
1490                                               SelectionDAG &DAG) const {
1491   SDLoc DL(Op);
1492   // The legalizer does not know how to expand the comparison modes of i64x2
1493   // vectors because no comparison modes are supported. We could solve this by
1494   // expanding all i64x2 SETCC nodes, but that seems to expand f64x2 SETCC nodes
1495   // (which return i64x2 results) as well. So instead we manually unroll i64x2
1496   // comparisons here.
1497   assert(Subtarget->hasUnimplementedSIMD128());
1498   assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
1499   SmallVector<SDValue, 2> LHS, RHS;
1500   DAG.ExtractVectorElements(Op->getOperand(0), LHS);
1501   DAG.ExtractVectorElements(Op->getOperand(1), RHS);
1502   const SDValue &CC = Op->getOperand(2);
1503   auto MakeLane = [&](unsigned I) {
1504     return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
1505                        DAG.getConstant(uint64_t(-1), DL, MVT::i64),
1506                        DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
1507   };
1508   return DAG.getBuildVector(Op->getValueType(0), DL,
1509                             {MakeLane(0), MakeLane(1)});
1510 }
1511 
1512 SDValue
1513 WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
1514                                                     SelectionDAG &DAG) const {
1515   // Allow constant lane indices, expand variable lane indices
1516   SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
1517   if (isa<ConstantSDNode>(IdxNode) || IdxNode->isUndef())
1518     return Op;
1519   else
1520     // Perform default expansion
1521     return SDValue();
1522 }
1523 
1524 static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) {
1525   EVT LaneT = Op.getSimpleValueType().getVectorElementType();
1526   // 32-bit and 64-bit unrolled shifts will have proper semantics
1527   if (LaneT.bitsGE(MVT::i32))
1528     return DAG.UnrollVectorOp(Op.getNode());
1529   // Otherwise mask the shift value to get proper semantics from 32-bit shift
1530   SDLoc DL(Op);
1531   SDValue ShiftVal = Op.getOperand(1);
1532   uint64_t MaskVal = LaneT.getSizeInBits() - 1;
1533   SDValue MaskedShiftVal = DAG.getNode(
1534       ISD::AND,                    // mask opcode
1535       DL, ShiftVal.getValueType(), // masked value type
1536       ShiftVal,                    // original shift value operand
1537       DAG.getConstant(MaskVal, DL, ShiftVal.getValueType()) // mask operand
1538   );
1539 
1540   return DAG.UnrollVectorOp(
1541       DAG.getNode(Op.getOpcode(),        // original shift opcode
1542                   DL, Op.getValueType(), // original return type
1543                   Op.getOperand(0),      // original vector operand,
1544                   MaskedShiftVal         // new masked shift value operand
1545                   )
1546           .getNode());
1547 }
1548 
1549 SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
1550                                               SelectionDAG &DAG) const {
1551   SDLoc DL(Op);
1552 
1553   // Only manually lower vector shifts
1554   assert(Op.getSimpleValueType().isVector());
1555 
1556   // Unroll non-splat vector shifts
1557   BuildVectorSDNode *ShiftVec;
1558   SDValue SplatVal;
1559   if (!(ShiftVec = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) ||
1560       !(SplatVal = ShiftVec->getSplatValue()))
1561     return unrollVectorShift(Op, DAG);
1562 
1563   // All splats except i64x2 const splats are handled by patterns
1564   auto *SplatConst = dyn_cast<ConstantSDNode>(SplatVal);
1565   if (!SplatConst || Op.getSimpleValueType() != MVT::v2i64)
1566     return Op;
1567 
1568   // i64x2 const splats are custom lowered to avoid unnecessary wraps
1569   unsigned Opcode;
1570   switch (Op.getOpcode()) {
1571   case ISD::SHL:
1572     Opcode = WebAssemblyISD::VEC_SHL;
1573     break;
1574   case ISD::SRA:
1575     Opcode = WebAssemblyISD::VEC_SHR_S;
1576     break;
1577   case ISD::SRL:
1578     Opcode = WebAssemblyISD::VEC_SHR_U;
1579     break;
1580   default:
1581     llvm_unreachable("unexpected opcode");
1582   }
1583   APInt Shift = SplatConst->getAPIntValue().zextOrTrunc(32);
1584   return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0),
1585                      DAG.getConstant(Shift, DL, MVT::i32));
1586 }
1587 
1588 //===----------------------------------------------------------------------===//
1589 //                          WebAssembly Optimization Hooks
1590 //===----------------------------------------------------------------------===//
1591