1 //=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the WebAssemblyTargetLowering class.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "WebAssemblyISelLowering.h"
15 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
16 #include "WebAssemblyMachineFunctionInfo.h"
17 #include "WebAssemblySubtarget.h"
18 #include "WebAssemblyTargetMachine.h"
19 #include "llvm/CodeGen/Analysis.h"
20 #include "llvm/CodeGen/CallingConvLower.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineJumpTableInfo.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/CodeGen/WasmEHFuncInfo.h"
27 #include "llvm/IR/DiagnosticInfo.h"
28 #include "llvm/IR/DiagnosticPrinter.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/IntrinsicsWebAssembly.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include "llvm/Target/TargetOptions.h"
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "wasm-lower"
39 
40 WebAssemblyTargetLowering::WebAssemblyTargetLowering(
41     const TargetMachine &TM, const WebAssemblySubtarget &STI)
42     : TargetLowering(TM), Subtarget(&STI) {
43   auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
44 
45   // Booleans always contain 0 or 1.
46   setBooleanContents(ZeroOrOneBooleanContent);
47   // Except in SIMD vectors
48   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
49   // We don't know the microarchitecture here, so just reduce register pressure.
50   setSchedulingPreference(Sched::RegPressure);
51   // Tell ISel that we have a stack pointer.
52   setStackPointerRegisterToSaveRestore(
53       Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
54   // Set up the register classes.
55   addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
56   addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
57   addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
58   addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
59   if (Subtarget->hasSIMD128()) {
60     addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
61     addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
62     addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
63     addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
64   }
65   if (Subtarget->hasUnimplementedSIMD128()) {
66     addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
67     addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
68   }
69   // Compute derived properties from the register classes.
70   computeRegisterProperties(Subtarget->getRegisterInfo());
71 
72   setOperationAction(ISD::GlobalAddress, MVTPtr, Custom);
73   setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom);
74   setOperationAction(ISD::JumpTable, MVTPtr, Custom);
75   setOperationAction(ISD::BlockAddress, MVTPtr, Custom);
76   setOperationAction(ISD::BRIND, MVT::Other, Custom);
77 
78   // Take the default expansion for va_arg, va_copy, and va_end. There is no
79   // default action for va_start, so we do that custom.
80   setOperationAction(ISD::VASTART, MVT::Other, Custom);
81   setOperationAction(ISD::VAARG, MVT::Other, Expand);
82   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
83   setOperationAction(ISD::VAEND, MVT::Other, Expand);
84 
85   for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
86     // Don't expand the floating-point types to constant pools.
87     setOperationAction(ISD::ConstantFP, T, Legal);
88     // Expand floating-point comparisons.
89     for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
90                     ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
91       setCondCodeAction(CC, T, Expand);
92     // Expand floating-point library function operators.
93     for (auto Op :
94          {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA})
95       setOperationAction(Op, T, Expand);
96     // Note supported floating-point library function operators that otherwise
97     // default to expand.
98     for (auto Op :
99          {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT})
100       setOperationAction(Op, T, Legal);
101     // Support minimum and maximum, which otherwise default to expand.
102     setOperationAction(ISD::FMINIMUM, T, Legal);
103     setOperationAction(ISD::FMAXIMUM, T, Legal);
104     // WebAssembly currently has no builtin f16 support.
105     setOperationAction(ISD::FP16_TO_FP, T, Expand);
106     setOperationAction(ISD::FP_TO_FP16, T, Expand);
107     setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand);
108     setTruncStoreAction(T, MVT::f16, Expand);
109   }
110 
111   // Expand unavailable integer operations.
112   for (auto Op :
113        {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,
114         ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS,
115         ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) {
116     for (auto T : {MVT::i32, MVT::i64})
117       setOperationAction(Op, T, Expand);
118     if (Subtarget->hasSIMD128())
119       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
120         setOperationAction(Op, T, Expand);
121     if (Subtarget->hasUnimplementedSIMD128())
122       setOperationAction(Op, MVT::v2i64, Expand);
123   }
124 
125   // SIMD-specific configuration
126   if (Subtarget->hasSIMD128()) {
127     // Support saturating add for i8x16 and i16x8
128     for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
129       for (auto T : {MVT::v16i8, MVT::v8i16})
130         setOperationAction(Op, T, Legal);
131 
132     // Custom lower BUILD_VECTORs to minimize number of replace_lanes
133     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
134       setOperationAction(ISD::BUILD_VECTOR, T, Custom);
135     if (Subtarget->hasUnimplementedSIMD128())
136       for (auto T : {MVT::v2i64, MVT::v2f64})
137         setOperationAction(ISD::BUILD_VECTOR, T, Custom);
138 
139     // We have custom shuffle lowering to expose the shuffle mask
140     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
141       setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
142     if (Subtarget->hasUnimplementedSIMD128())
143       for (auto T: {MVT::v2i64, MVT::v2f64})
144         setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
145 
146     // Custom lowering since wasm shifts must have a scalar shift amount
147     for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL}) {
148       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
149         setOperationAction(Op, T, Custom);
150       if (Subtarget->hasUnimplementedSIMD128())
151         setOperationAction(Op, MVT::v2i64, Custom);
152     }
153 
154     // Custom lower lane accesses to expand out variable indices
155     for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT}) {
156       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
157         setOperationAction(Op, T, Custom);
158       if (Subtarget->hasUnimplementedSIMD128())
159         for (auto T : {MVT::v2i64, MVT::v2f64})
160           setOperationAction(Op, T, Custom);
161     }
162 
163     // There is no i64x2.mul instruction
164     setOperationAction(ISD::MUL, MVT::v2i64, Expand);
165 
166     // There are no vector select instructions
167     for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT}) {
168       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
169         setOperationAction(Op, T, Expand);
170       if (Subtarget->hasUnimplementedSIMD128())
171         for (auto T : {MVT::v2i64, MVT::v2f64})
172           setOperationAction(Op, T, Expand);
173     }
174 
175     // Expand integer operations supported for scalars but not SIMD
176     for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV,
177                     ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR}) {
178       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
179         setOperationAction(Op, T, Expand);
180       if (Subtarget->hasUnimplementedSIMD128())
181         setOperationAction(Op, MVT::v2i64, Expand);
182     }
183 
184     // Expand float operations supported for scalars but not SIMD
185     for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
186                     ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
187                     ISD::FEXP, ISD::FEXP2, ISD::FRINT}) {
188       setOperationAction(Op, MVT::v4f32, Expand);
189       if (Subtarget->hasUnimplementedSIMD128())
190         setOperationAction(Op, MVT::v2f64, Expand);
191     }
192 
193     // Expand operations not supported for i64x2 vectors
194     if (Subtarget->hasUnimplementedSIMD128())
195       for (unsigned CC = 0; CC < ISD::SETCC_INVALID; ++CC)
196         setCondCodeAction(static_cast<ISD::CondCode>(CC), MVT::v2i64, Custom);
197 
198     // Expand additional SIMD ops that V8 hasn't implemented yet
199     if (!Subtarget->hasUnimplementedSIMD128()) {
200       setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
201       setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
202     }
203   }
204 
205   // As a special case, these operators use the type to mean the type to
206   // sign-extend from.
207   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
208   if (!Subtarget->hasSignExt()) {
209     // Sign extends are legal only when extending a vector extract
210     auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
211     for (auto T : {MVT::i8, MVT::i16, MVT::i32})
212       setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action);
213   }
214   for (auto T : MVT::integer_fixedlen_vector_valuetypes())
215     setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
216 
217   // Dynamic stack allocation: use the default expansion.
218   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
219   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
220   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);
221 
222   setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
223   setOperationAction(ISD::CopyToReg, MVT::Other, Custom);
224 
225   // Expand these forms; we pattern-match the forms that we can handle in isel.
226   for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
227     for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
228       setOperationAction(Op, T, Expand);
229 
230   // We have custom switch handling.
231   setOperationAction(ISD::BR_JT, MVT::Other, Custom);
232 
233   // WebAssembly doesn't have:
234   //  - Floating-point extending loads.
235   //  - Floating-point truncating stores.
236   //  - i1 extending loads.
237   //  - truncating SIMD stores and most extending loads
238   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
239   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
240   for (auto T : MVT::integer_valuetypes())
241     for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
242       setLoadExtAction(Ext, T, MVT::i1, Promote);
243   if (Subtarget->hasSIMD128()) {
244     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
245                    MVT::v2f64}) {
246       for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
247         if (MVT(T) != MemT) {
248           setTruncStoreAction(T, MemT, Expand);
249           for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
250             setLoadExtAction(Ext, T, MemT, Expand);
251         }
252       }
253     }
254     // But some vector extending loads are legal
255     if (Subtarget->hasUnimplementedSIMD128()) {
256       for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
257         setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
258         setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
259         setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
260       }
261     }
262   }
263 
264   // Don't do anything clever with build_pairs
265   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
266 
267   // Trap lowers to wasm unreachable
268   setOperationAction(ISD::TRAP, MVT::Other, Legal);
269 
270   // Exception handling intrinsics
271   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
272   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
273 
274   setMaxAtomicSizeInBitsSupported(64);
275 
276   // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is
277   // consistent with the f64 and f128 names.
278   setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
279   setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
280 
281   // Define the emscripten name for return address helper.
282   // TODO: when implementing other WASM backends, make this generic or only do
283   // this on emscripten depending on what they end up doing.
284   setLibcallName(RTLIB::RETURN_ADDRESS, "emscripten_return_address");
285 
286   // Always convert switches to br_tables unless there is only one case, which
287   // is equivalent to a simple branch. This reduces code size for wasm, and we
288   // defer possible jump table optimizations to the VM.
289   setMinimumJumpTableEntries(2);
290 }
291 
292 TargetLowering::AtomicExpansionKind
293 WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
294   // We have wasm instructions for these
295   switch (AI->getOperation()) {
296   case AtomicRMWInst::Add:
297   case AtomicRMWInst::Sub:
298   case AtomicRMWInst::And:
299   case AtomicRMWInst::Or:
300   case AtomicRMWInst::Xor:
301   case AtomicRMWInst::Xchg:
302     return AtomicExpansionKind::None;
303   default:
304     break;
305   }
306   return AtomicExpansionKind::CmpXChg;
307 }
308 
309 FastISel *WebAssemblyTargetLowering::createFastISel(
310     FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
311   return WebAssembly::createFastISel(FuncInfo, LibInfo);
312 }
313 
314 MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
315                                                       EVT VT) const {
316   unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
317   if (BitWidth > 1 && BitWidth < 8)
318     BitWidth = 8;
319 
320   if (BitWidth > 64) {
321     // The shift will be lowered to a libcall, and compiler-rt libcalls expect
322     // the count to be an i32.
323     BitWidth = 32;
324     assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) &&
325            "32-bit shift counts ought to be enough for anyone");
326   }
327 
328   MVT Result = MVT::getIntegerVT(BitWidth);
329   assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE &&
330          "Unable to represent scalar shift amount type");
331   return Result;
332 }
333 
334 // Lower an fp-to-int conversion operator from the LLVM opcode, which has an
335 // undefined result on invalid/overflow, to the WebAssembly opcode, which
336 // traps on invalid/overflow.
337 static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
338                                        MachineBasicBlock *BB,
339                                        const TargetInstrInfo &TII,
340                                        bool IsUnsigned, bool Int64,
341                                        bool Float64, unsigned LoweredOpcode) {
342   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
343 
344   Register OutReg = MI.getOperand(0).getReg();
345   Register InReg = MI.getOperand(1).getReg();
346 
347   unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
348   unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
349   unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
350   unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
351   unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
352   unsigned Eqz = WebAssembly::EQZ_I32;
353   unsigned And = WebAssembly::AND_I32;
354   int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
355   int64_t Substitute = IsUnsigned ? 0 : Limit;
356   double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
357   auto &Context = BB->getParent()->getFunction().getContext();
358   Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
359 
360   const BasicBlock *LLVMBB = BB->getBasicBlock();
361   MachineFunction *F = BB->getParent();
362   MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
363   MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
364   MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
365 
366   MachineFunction::iterator It = ++BB->getIterator();
367   F->insert(It, FalseMBB);
368   F->insert(It, TrueMBB);
369   F->insert(It, DoneMBB);
370 
371   // Transfer the remainder of BB and its successor edges to DoneMBB.
372   DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
373   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
374 
375   BB->addSuccessor(TrueMBB);
376   BB->addSuccessor(FalseMBB);
377   TrueMBB->addSuccessor(DoneMBB);
378   FalseMBB->addSuccessor(DoneMBB);
379 
380   unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
381   Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
382   Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
383   CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
384   EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
385   FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
386   TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
387 
388   MI.eraseFromParent();
389   // For signed numbers, we can do a single comparison to determine whether
390   // fabs(x) is within range.
391   if (IsUnsigned) {
392     Tmp0 = InReg;
393   } else {
394     BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
395   }
396   BuildMI(BB, DL, TII.get(FConst), Tmp1)
397       .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
398   BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
399 
400   // For unsigned numbers, we have to do a separate comparison with zero.
401   if (IsUnsigned) {
402     Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
403     Register SecondCmpReg =
404         MRI.createVirtualRegister(&WebAssembly::I32RegClass);
405     Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
406     BuildMI(BB, DL, TII.get(FConst), Tmp1)
407         .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
408     BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
409     BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
410     CmpReg = AndReg;
411   }
412 
413   BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
414 
415   // Create the CFG diamond to select between doing the conversion or using
416   // the substitute value.
417   BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
418   BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
419   BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
420   BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
421   BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
422       .addReg(FalseReg)
423       .addMBB(FalseMBB)
424       .addReg(TrueReg)
425       .addMBB(TrueMBB);
426 
427   return DoneMBB;
428 }
429 
430 MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
431     MachineInstr &MI, MachineBasicBlock *BB) const {
432   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
433   DebugLoc DL = MI.getDebugLoc();
434 
435   switch (MI.getOpcode()) {
436   default:
437     llvm_unreachable("Unexpected instr type to insert");
438   case WebAssembly::FP_TO_SINT_I32_F32:
439     return LowerFPToInt(MI, DL, BB, TII, false, false, false,
440                         WebAssembly::I32_TRUNC_S_F32);
441   case WebAssembly::FP_TO_UINT_I32_F32:
442     return LowerFPToInt(MI, DL, BB, TII, true, false, false,
443                         WebAssembly::I32_TRUNC_U_F32);
444   case WebAssembly::FP_TO_SINT_I64_F32:
445     return LowerFPToInt(MI, DL, BB, TII, false, true, false,
446                         WebAssembly::I64_TRUNC_S_F32);
447   case WebAssembly::FP_TO_UINT_I64_F32:
448     return LowerFPToInt(MI, DL, BB, TII, true, true, false,
449                         WebAssembly::I64_TRUNC_U_F32);
450   case WebAssembly::FP_TO_SINT_I32_F64:
451     return LowerFPToInt(MI, DL, BB, TII, false, false, true,
452                         WebAssembly::I32_TRUNC_S_F64);
453   case WebAssembly::FP_TO_UINT_I32_F64:
454     return LowerFPToInt(MI, DL, BB, TII, true, false, true,
455                         WebAssembly::I32_TRUNC_U_F64);
456   case WebAssembly::FP_TO_SINT_I64_F64:
457     return LowerFPToInt(MI, DL, BB, TII, false, true, true,
458                         WebAssembly::I64_TRUNC_S_F64);
459   case WebAssembly::FP_TO_UINT_I64_F64:
460     return LowerFPToInt(MI, DL, BB, TII, true, true, true,
461                         WebAssembly::I64_TRUNC_U_F64);
462     llvm_unreachable("Unexpected instruction to emit with custom inserter");
463   }
464 }
465 
466 const char *
467 WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {
468   switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
469   case WebAssemblyISD::FIRST_NUMBER:
470   case WebAssemblyISD::FIRST_MEM_OPCODE:
471     break;
472 #define HANDLE_NODETYPE(NODE)                                                  \
473   case WebAssemblyISD::NODE:                                                   \
474     return "WebAssemblyISD::" #NODE;
475 #define HANDLE_MEM_NODETYPE(NODE) HANDLE_NODETYPE(NODE)
476 #include "WebAssemblyISD.def"
477 #undef HANDLE_MEM_NODETYPE
478 #undef HANDLE_NODETYPE
479   }
480   return nullptr;
481 }
482 
483 std::pair<unsigned, const TargetRegisterClass *>
484 WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
485     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
486   // First, see if this is a constraint that directly corresponds to a
487   // WebAssembly register class.
488   if (Constraint.size() == 1) {
489     switch (Constraint[0]) {
490     case 'r':
491       assert(VT != MVT::iPTR && "Pointer MVT not expected here");
492       if (Subtarget->hasSIMD128() && VT.isVector()) {
493         if (VT.getSizeInBits() == 128)
494           return std::make_pair(0U, &WebAssembly::V128RegClass);
495       }
496       if (VT.isInteger() && !VT.isVector()) {
497         if (VT.getSizeInBits() <= 32)
498           return std::make_pair(0U, &WebAssembly::I32RegClass);
499         if (VT.getSizeInBits() <= 64)
500           return std::make_pair(0U, &WebAssembly::I64RegClass);
501       }
502       break;
503     default:
504       break;
505     }
506   }
507 
508   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
509 }
510 
511 bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const {
512   // Assume ctz is a relatively cheap operation.
513   return true;
514 }
515 
516 bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const {
517   // Assume clz is a relatively cheap operation.
518   return true;
519 }
520 
521 bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
522                                                       const AddrMode &AM,
523                                                       Type *Ty, unsigned AS,
524                                                       Instruction *I) const {
525   // WebAssembly offsets are added as unsigned without wrapping. The
526   // isLegalAddressingMode gives us no way to determine if wrapping could be
527   // happening, so we approximate this by accepting only non-negative offsets.
528   if (AM.BaseOffs < 0)
529     return false;
530 
531   // WebAssembly has no scale register operands.
532   if (AM.Scale != 0)
533     return false;
534 
535   // Everything else is legal.
536   return true;
537 }
538 
539 bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
540     EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/,
541     MachineMemOperand::Flags /*Flags*/, bool *Fast) const {
542   // WebAssembly supports unaligned accesses, though it should be declared
543   // with the p2align attribute on loads and stores which do so, and there
544   // may be a performance impact. We tell LLVM they're "fast" because
545   // for the kinds of things that LLVM uses this for (merging adjacent stores
546   // of constants, etc.), WebAssembly implementations will either want the
547   // unaligned access or they'll split anyway.
548   if (Fast)
549     *Fast = true;
550   return true;
551 }
552 
553 bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
554                                               AttributeList Attr) const {
555   // The current thinking is that wasm engines will perform this optimization,
556   // so we can save on code size.
557   return true;
558 }
559 
560 bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
561   if (!Subtarget->hasUnimplementedSIMD128())
562     return false;
563   MVT ExtT = ExtVal.getSimpleValueType();
564   MVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getSimpleValueType(0);
565   return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
566          (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
567          (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
568 }
569 
570 EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
571                                                   LLVMContext &C,
572                                                   EVT VT) const {
573   if (VT.isVector())
574     return VT.changeVectorElementTypeToInteger();
575 
576   return TargetLowering::getSetCCResultType(DL, C, VT);
577 }
578 
579 bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
580                                                    const CallInst &I,
581                                                    MachineFunction &MF,
582                                                    unsigned Intrinsic) const {
583   switch (Intrinsic) {
584   case Intrinsic::wasm_atomic_notify:
585     Info.opc = ISD::INTRINSIC_W_CHAIN;
586     Info.memVT = MVT::i32;
587     Info.ptrVal = I.getArgOperand(0);
588     Info.offset = 0;
589     Info.align = Align(4);
590     // atomic.notify instruction does not really load the memory specified with
591     // this argument, but MachineMemOperand should either be load or store, so
592     // we set this to a load.
593     // FIXME Volatile isn't really correct, but currently all LLVM atomic
594     // instructions are treated as volatiles in the backend, so we should be
595     // consistent. The same applies for wasm_atomic_wait intrinsics too.
596     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
597     return true;
598   case Intrinsic::wasm_atomic_wait_i32:
599     Info.opc = ISD::INTRINSIC_W_CHAIN;
600     Info.memVT = MVT::i32;
601     Info.ptrVal = I.getArgOperand(0);
602     Info.offset = 0;
603     Info.align = Align(4);
604     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
605     return true;
606   case Intrinsic::wasm_atomic_wait_i64:
607     Info.opc = ISD::INTRINSIC_W_CHAIN;
608     Info.memVT = MVT::i64;
609     Info.ptrVal = I.getArgOperand(0);
610     Info.offset = 0;
611     Info.align = Align(8);
612     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
613     return true;
614   default:
615     return false;
616   }
617 }
618 
619 //===----------------------------------------------------------------------===//
620 // WebAssembly Lowering private implementation.
621 //===----------------------------------------------------------------------===//
622 
623 //===----------------------------------------------------------------------===//
624 // Lowering Code
625 //===----------------------------------------------------------------------===//
626 
627 static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
628   MachineFunction &MF = DAG.getMachineFunction();
629   DAG.getContext()->diagnose(
630       DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
631 }
632 
633 // Test whether the given calling convention is supported.
634 static bool callingConvSupported(CallingConv::ID CallConv) {
635   // We currently support the language-independent target-independent
636   // conventions. We don't yet have a way to annotate calls with properties like
637   // "cold", and we don't have any call-clobbered registers, so these are mostly
638   // all handled the same.
639   return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
640          CallConv == CallingConv::Cold ||
641          CallConv == CallingConv::PreserveMost ||
642          CallConv == CallingConv::PreserveAll ||
643          CallConv == CallingConv::CXX_FAST_TLS ||
644          CallConv == CallingConv::WASM_EmscriptenInvoke;
645 }
646 
647 SDValue
648 WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
649                                      SmallVectorImpl<SDValue> &InVals) const {
650   SelectionDAG &DAG = CLI.DAG;
651   SDLoc DL = CLI.DL;
652   SDValue Chain = CLI.Chain;
653   SDValue Callee = CLI.Callee;
654   MachineFunction &MF = DAG.getMachineFunction();
655   auto Layout = MF.getDataLayout();
656 
657   CallingConv::ID CallConv = CLI.CallConv;
658   if (!callingConvSupported(CallConv))
659     fail(DL, DAG,
660          "WebAssembly doesn't support language-specific or target-specific "
661          "calling conventions yet");
662   if (CLI.IsPatchPoint)
663     fail(DL, DAG, "WebAssembly doesn't support patch point yet");
664 
665   if (CLI.IsTailCall) {
666     bool MustTail = CLI.CS && CLI.CS.isMustTailCall();
667     if (Subtarget->hasTailCall() && !CLI.IsVarArg) {
668       // Do not tail call unless caller and callee return types match
669       const Function &F = MF.getFunction();
670       const TargetMachine &TM = getTargetMachine();
671       Type *RetTy = F.getReturnType();
672       SmallVector<MVT, 4> CallerRetTys;
673       SmallVector<MVT, 4> CalleeRetTys;
674       computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
675       computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
676       bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
677                         std::equal(CallerRetTys.begin(), CallerRetTys.end(),
678                                    CalleeRetTys.begin());
679       if (!TypesMatch) {
680         // musttail in this case would be an LLVM IR validation failure
681         assert(!MustTail);
682         CLI.IsTailCall = false;
683       }
684     } else {
685       CLI.IsTailCall = false;
686       if (MustTail) {
687         if (CLI.IsVarArg) {
688           // The return would pop the argument buffer
689           fail(DL, DAG, "WebAssembly does not support varargs tail calls");
690         } else {
691           fail(DL, DAG, "WebAssembly 'tail-call' feature not enabled");
692         }
693       }
694     }
695   }
696 
697   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
698   if (Ins.size() > 1)
699     fail(DL, DAG, "WebAssembly doesn't support more than 1 returned value yet");
700 
701   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
702   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
703 
704   // The generic code may have added an sret argument. If we're lowering an
705   // invoke function, the ABI requires that the function pointer be the first
706   // argument, so we may have to swap the arguments.
707   if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
708       Outs[0].Flags.isSRet()) {
709     std::swap(Outs[0], Outs[1]);
710     std::swap(OutVals[0], OutVals[1]);
711   }
712 
713   unsigned NumFixedArgs = 0;
714   for (unsigned I = 0; I < Outs.size(); ++I) {
715     const ISD::OutputArg &Out = Outs[I];
716     SDValue &OutVal = OutVals[I];
717     if (Out.Flags.isNest())
718       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
719     if (Out.Flags.isInAlloca())
720       fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
721     if (Out.Flags.isInConsecutiveRegs())
722       fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
723     if (Out.Flags.isInConsecutiveRegsLast())
724       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
725     if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
726       auto &MFI = MF.getFrameInfo();
727       int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
728                                      Out.Flags.getByValAlign(),
729                                      /*isSS=*/false);
730       SDValue SizeNode =
731           DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
732       SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
733       Chain = DAG.getMemcpy(
734           Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getByValAlign(),
735           /*isVolatile*/ false, /*AlwaysInline=*/false,
736           /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
737       OutVal = FINode;
738     }
739     // Count the number of fixed args *after* legalization.
740     NumFixedArgs += Out.IsFixed;
741   }
742 
743   bool IsVarArg = CLI.IsVarArg;
744   auto PtrVT = getPointerTy(Layout);
745 
746   // Analyze operands of the call, assigning locations to each operand.
747   SmallVector<CCValAssign, 16> ArgLocs;
748   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
749 
750   if (IsVarArg) {
751     // Outgoing non-fixed arguments are placed in a buffer. First
752     // compute their offsets and the total amount of buffer space needed.
753     for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
754       const ISD::OutputArg &Out = Outs[I];
755       SDValue &Arg = OutVals[I];
756       EVT VT = Arg.getValueType();
757       assert(VT != MVT::iPTR && "Legalized args should be concrete");
758       Type *Ty = VT.getTypeForEVT(*DAG.getContext());
759       unsigned Align = std::max(Out.Flags.getOrigAlign(),
760                                 Layout.getABITypeAlignment(Ty));
761       unsigned Offset = CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty),
762                                              Align);
763       CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
764                                         Offset, VT.getSimpleVT(),
765                                         CCValAssign::Full));
766     }
767   }
768 
769   unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
770 
771   SDValue FINode;
772   if (IsVarArg && NumBytes) {
773     // For non-fixed arguments, next emit stores to store the argument values
774     // to the stack buffer at the offsets computed above.
775     int FI = MF.getFrameInfo().CreateStackObject(NumBytes,
776                                                  Layout.getStackAlignment(),
777                                                  /*isSS=*/false);
778     unsigned ValNo = 0;
779     SmallVector<SDValue, 8> Chains;
780     for (SDValue Arg :
781          make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) {
782       assert(ArgLocs[ValNo].getValNo() == ValNo &&
783              "ArgLocs should remain in order and only hold varargs args");
784       unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
785       FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
786       SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
787                                 DAG.getConstant(Offset, DL, PtrVT));
788       Chains.push_back(
789           DAG.getStore(Chain, DL, Arg, Add,
790                        MachinePointerInfo::getFixedStack(MF, FI, Offset), 0));
791     }
792     if (!Chains.empty())
793       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
794   } else if (IsVarArg) {
795     FINode = DAG.getIntPtrConstant(0, DL);
796   }
797 
798   if (Callee->getOpcode() == ISD::GlobalAddress) {
799     // If the callee is a GlobalAddress node (quite common, every direct call
800     // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
801     // doesn't at MO_GOT which is not needed for direct calls.
802     GlobalAddressSDNode* GA = cast<GlobalAddressSDNode>(Callee);
803     Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
804                                         getPointerTy(DAG.getDataLayout()),
805                                         GA->getOffset());
806     Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
807                          getPointerTy(DAG.getDataLayout()), Callee);
808   }
809 
810   // Compute the operands for the CALLn node.
811   SmallVector<SDValue, 16> Ops;
812   Ops.push_back(Chain);
813   Ops.push_back(Callee);
814 
815   // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
816   // isn't reliable.
817   Ops.append(OutVals.begin(),
818              IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
819   // Add a pointer to the vararg buffer.
820   if (IsVarArg)
821     Ops.push_back(FINode);
822 
823   SmallVector<EVT, 8> InTys;
824   for (const auto &In : Ins) {
825     assert(!In.Flags.isByVal() && "byval is not valid for return values");
826     assert(!In.Flags.isNest() && "nest is not valid for return values");
827     if (In.Flags.isInAlloca())
828       fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
829     if (In.Flags.isInConsecutiveRegs())
830       fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
831     if (In.Flags.isInConsecutiveRegsLast())
832       fail(DL, DAG,
833            "WebAssembly hasn't implemented cons regs last return values");
834     // Ignore In.getOrigAlign() because all our arguments are passed in
835     // registers.
836     InTys.push_back(In.VT);
837   }
838 
839   if (CLI.IsTailCall) {
840     // ret_calls do not return values to the current frame
841     SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
842     return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
843   }
844 
845   InTys.push_back(MVT::Other);
846   SDVTList InTyList = DAG.getVTList(InTys);
847   SDValue Res =
848       DAG.getNode(Ins.empty() ? WebAssemblyISD::CALL0 : WebAssemblyISD::CALL1,
849                   DL, InTyList, Ops);
850   if (Ins.empty()) {
851     Chain = Res;
852   } else {
853     InVals.push_back(Res);
854     Chain = Res.getValue(1);
855   }
856 
857   return Chain;
858 }
859 
860 bool WebAssemblyTargetLowering::CanLowerReturn(
861     CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
862     const SmallVectorImpl<ISD::OutputArg> &Outs,
863     LLVMContext & /*Context*/) const {
864   // WebAssembly can only handle returning tuples with multivalue enabled
865   return Subtarget->hasMultivalue() || Outs.size() <= 1;
866 }
867 
868 SDValue WebAssemblyTargetLowering::LowerReturn(
869     SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
870     const SmallVectorImpl<ISD::OutputArg> &Outs,
871     const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
872     SelectionDAG &DAG) const {
873   assert((Subtarget->hasMultivalue() || Outs.size() <= 1) &&
874          "MVP WebAssembly can only return up to one value");
875   if (!callingConvSupported(CallConv))
876     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
877 
878   SmallVector<SDValue, 4> RetOps(1, Chain);
879   RetOps.append(OutVals.begin(), OutVals.end());
880   Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
881 
882   // Record the number and types of the return values.
883   for (const ISD::OutputArg &Out : Outs) {
884     assert(!Out.Flags.isByVal() && "byval is not valid for return values");
885     assert(!Out.Flags.isNest() && "nest is not valid for return values");
886     assert(Out.IsFixed && "non-fixed return value is not valid");
887     if (Out.Flags.isInAlloca())
888       fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
889     if (Out.Flags.isInConsecutiveRegs())
890       fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
891     if (Out.Flags.isInConsecutiveRegsLast())
892       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
893   }
894 
895   return Chain;
896 }
897 
898 SDValue WebAssemblyTargetLowering::LowerFormalArguments(
899     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
900     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
901     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
902   if (!callingConvSupported(CallConv))
903     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
904 
905   MachineFunction &MF = DAG.getMachineFunction();
906   auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
907 
908   // Set up the incoming ARGUMENTS value, which serves to represent the liveness
909   // of the incoming values before they're represented by virtual registers.
910   MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
911 
912   for (const ISD::InputArg &In : Ins) {
913     if (In.Flags.isInAlloca())
914       fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
915     if (In.Flags.isNest())
916       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
917     if (In.Flags.isInConsecutiveRegs())
918       fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
919     if (In.Flags.isInConsecutiveRegsLast())
920       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
921     // Ignore In.getOrigAlign() because all our arguments are passed in
922     // registers.
923     InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
924                                            DAG.getTargetConstant(InVals.size(),
925                                                                  DL, MVT::i32))
926                              : DAG.getUNDEF(In.VT));
927 
928     // Record the number and types of arguments.
929     MFI->addParam(In.VT);
930   }
931 
932   // Varargs are copied into a buffer allocated by the caller, and a pointer to
933   // the buffer is passed as an argument.
934   if (IsVarArg) {
935     MVT PtrVT = getPointerTy(MF.getDataLayout());
936     Register VarargVreg =
937         MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT));
938     MFI->setVarargBufferVreg(VarargVreg);
939     Chain = DAG.getCopyToReg(
940         Chain, DL, VarargVreg,
941         DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
942                     DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
943     MFI->addParam(PtrVT);
944   }
945 
946   // Record the number and types of arguments and results.
947   SmallVector<MVT, 4> Params;
948   SmallVector<MVT, 4> Results;
949   computeSignatureVTs(MF.getFunction().getFunctionType(), MF.getFunction(),
950                       DAG.getTarget(), Params, Results);
951   for (MVT VT : Results)
952     MFI->addResult(VT);
953   // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
954   // the param logic here with ComputeSignatureVTs
955   assert(MFI->getParams().size() == Params.size() &&
956          std::equal(MFI->getParams().begin(), MFI->getParams().end(),
957                     Params.begin()));
958 
959   return Chain;
960 }
961 
962 void WebAssemblyTargetLowering::ReplaceNodeResults(
963     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
964   switch (N->getOpcode()) {
965   case ISD::SIGN_EXTEND_INREG:
966     // Do not add any results, signifying that N should not be custom lowered
967     // after all. This happens because simd128 turns on custom lowering for
968     // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
969     // illegal type.
970     break;
971   default:
972     llvm_unreachable(
973         "ReplaceNodeResults not implemented for this op for WebAssembly!");
974   }
975 }
976 
977 //===----------------------------------------------------------------------===//
978 //  Custom lowering hooks.
979 //===----------------------------------------------------------------------===//
980 
981 SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
982                                                   SelectionDAG &DAG) const {
983   SDLoc DL(Op);
984   switch (Op.getOpcode()) {
985   default:
986     llvm_unreachable("unimplemented operation lowering");
987     return SDValue();
988   case ISD::FrameIndex:
989     return LowerFrameIndex(Op, DAG);
990   case ISD::GlobalAddress:
991     return LowerGlobalAddress(Op, DAG);
992   case ISD::ExternalSymbol:
993     return LowerExternalSymbol(Op, DAG);
994   case ISD::JumpTable:
995     return LowerJumpTable(Op, DAG);
996   case ISD::BR_JT:
997     return LowerBR_JT(Op, DAG);
998   case ISD::VASTART:
999     return LowerVASTART(Op, DAG);
1000   case ISD::BlockAddress:
1001   case ISD::BRIND:
1002     fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1003     return SDValue();
1004   case ISD::RETURNADDR:
1005     return LowerRETURNADDR(Op, DAG);
1006   case ISD::FRAMEADDR:
1007     return LowerFRAMEADDR(Op, DAG);
1008   case ISD::CopyToReg:
1009     return LowerCopyToReg(Op, DAG);
1010   case ISD::EXTRACT_VECTOR_ELT:
1011   case ISD::INSERT_VECTOR_ELT:
1012     return LowerAccessVectorElement(Op, DAG);
1013   case ISD::INTRINSIC_VOID:
1014   case ISD::INTRINSIC_WO_CHAIN:
1015   case ISD::INTRINSIC_W_CHAIN:
1016     return LowerIntrinsic(Op, DAG);
1017   case ISD::SIGN_EXTEND_INREG:
1018     return LowerSIGN_EXTEND_INREG(Op, DAG);
1019   case ISD::BUILD_VECTOR:
1020     return LowerBUILD_VECTOR(Op, DAG);
1021   case ISD::VECTOR_SHUFFLE:
1022     return LowerVECTOR_SHUFFLE(Op, DAG);
1023   case ISD::SETCC:
1024     return LowerSETCC(Op, DAG);
1025   case ISD::SHL:
1026   case ISD::SRA:
1027   case ISD::SRL:
1028     return LowerShift(Op, DAG);
1029   }
1030 }
1031 
1032 SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1033                                                   SelectionDAG &DAG) const {
1034   SDValue Src = Op.getOperand(2);
1035   if (isa<FrameIndexSDNode>(Src.getNode())) {
1036     // CopyToReg nodes don't support FrameIndex operands. Other targets select
1037     // the FI to some LEA-like instruction, but since we don't have that, we
1038     // need to insert some kind of instruction that can take an FI operand and
1039     // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1040     // local.copy between Op and its FI operand.
1041     SDValue Chain = Op.getOperand(0);
1042     SDLoc DL(Op);
1043     unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
1044     EVT VT = Src.getValueType();
1045     SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
1046                                                    : WebAssembly::COPY_I64,
1047                                     DL, VT, Src),
1048                  0);
1049     return Op.getNode()->getNumValues() == 1
1050                ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
1051                : DAG.getCopyToReg(Chain, DL, Reg, Copy,
1052                                   Op.getNumOperands() == 4 ? Op.getOperand(3)
1053                                                            : SDValue());
1054   }
1055   return SDValue();
1056 }
1057 
1058 SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
1059                                                    SelectionDAG &DAG) const {
1060   int FI = cast<FrameIndexSDNode>(Op)->getIndex();
1061   return DAG.getTargetFrameIndex(FI, Op.getValueType());
1062 }
1063 
1064 SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
1065                                                    SelectionDAG &DAG) const {
1066   SDLoc DL(Op);
1067 
1068   if (!Subtarget->getTargetTriple().isOSEmscripten()) {
1069     fail(DL, DAG,
1070          "Non-Emscripten WebAssembly hasn't implemented "
1071          "__builtin_return_address");
1072     return SDValue();
1073   }
1074 
1075   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1076     return SDValue();
1077 
1078   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1079   MakeLibCallOptions CallOptions;
1080   return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
1081                      {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
1082       .first;
1083 }
1084 
1085 SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
1086                                                   SelectionDAG &DAG) const {
1087   // Non-zero depths are not supported by WebAssembly currently. Use the
1088   // legalizer's default expansion, which is to return 0 (what this function is
1089   // documented to do).
1090   if (Op.getConstantOperandVal(0) > 0)
1091     return SDValue();
1092 
1093   DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
1094   EVT VT = Op.getValueType();
1095   Register FP =
1096       Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
1097   return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
1098 }
1099 
1100 SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
1101                                                       SelectionDAG &DAG) const {
1102   SDLoc DL(Op);
1103   const auto *GA = cast<GlobalAddressSDNode>(Op);
1104   EVT VT = Op.getValueType();
1105   assert(GA->getTargetFlags() == 0 &&
1106          "Unexpected target flags on generic GlobalAddressSDNode");
1107   if (GA->getAddressSpace() != 0)
1108     fail(DL, DAG, "WebAssembly only expects the 0 address space");
1109 
1110   unsigned OperandFlags = 0;
1111   if (isPositionIndependent()) {
1112     const GlobalValue *GV = GA->getGlobal();
1113     if (getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) {
1114       MachineFunction &MF = DAG.getMachineFunction();
1115       MVT PtrVT = getPointerTy(MF.getDataLayout());
1116       const char *BaseName;
1117       if (GV->getValueType()->isFunctionTy()) {
1118         BaseName = MF.createExternalSymbolName("__table_base");
1119         OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL;
1120       }
1121       else {
1122         BaseName = MF.createExternalSymbolName("__memory_base");
1123         OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL;
1124       }
1125       SDValue BaseAddr =
1126           DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
1127                       DAG.getTargetExternalSymbol(BaseName, PtrVT));
1128 
1129       SDValue SymAddr = DAG.getNode(
1130           WebAssemblyISD::WrapperPIC, DL, VT,
1131           DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
1132                                      OperandFlags));
1133 
1134       return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
1135     } else {
1136       OperandFlags = WebAssemblyII::MO_GOT;
1137     }
1138   }
1139 
1140   return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1141                      DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
1142                                                 GA->getOffset(), OperandFlags));
1143 }
1144 
1145 SDValue
1146 WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
1147                                                SelectionDAG &DAG) const {
1148   SDLoc DL(Op);
1149   const auto *ES = cast<ExternalSymbolSDNode>(Op);
1150   EVT VT = Op.getValueType();
1151   assert(ES->getTargetFlags() == 0 &&
1152          "Unexpected target flags on generic ExternalSymbolSDNode");
1153   return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1154                      DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
1155 }
1156 
1157 SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
1158                                                   SelectionDAG &DAG) const {
1159   // There's no need for a Wrapper node because we always incorporate a jump
1160   // table operand into a BR_TABLE instruction, rather than ever
1161   // materializing it in a register.
1162   const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1163   return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
1164                                 JT->getTargetFlags());
1165 }
1166 
1167 SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
1168                                               SelectionDAG &DAG) const {
1169   SDLoc DL(Op);
1170   SDValue Chain = Op.getOperand(0);
1171   const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
1172   SDValue Index = Op.getOperand(2);
1173   assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
1174 
1175   SmallVector<SDValue, 8> Ops;
1176   Ops.push_back(Chain);
1177   Ops.push_back(Index);
1178 
1179   MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
1180   const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
1181 
1182   // Add an operand for each case.
1183   for (auto MBB : MBBs)
1184     Ops.push_back(DAG.getBasicBlock(MBB));
1185 
1186   // TODO: For now, we just pick something arbitrary for a default case for now.
1187   // We really want to sniff out the guard and put in the real default case (and
1188   // delete the guard).
1189   Ops.push_back(DAG.getBasicBlock(MBBs[0]));
1190 
1191   return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
1192 }
1193 
1194 SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
1195                                                 SelectionDAG &DAG) const {
1196   SDLoc DL(Op);
1197   EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
1198 
1199   auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
1200   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1201 
1202   SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
1203                                     MFI->getVarargBufferVreg(), PtrVT);
1204   return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
1205                       MachinePointerInfo(SV), 0);
1206 }
1207 
1208 SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
1209                                                   SelectionDAG &DAG) const {
1210   MachineFunction &MF = DAG.getMachineFunction();
1211   unsigned IntNo;
1212   switch (Op.getOpcode()) {
1213   case ISD::INTRINSIC_VOID:
1214   case ISD::INTRINSIC_W_CHAIN:
1215     IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1216     break;
1217   case ISD::INTRINSIC_WO_CHAIN:
1218     IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1219     break;
1220   default:
1221     llvm_unreachable("Invalid intrinsic");
1222   }
1223   SDLoc DL(Op);
1224 
1225   switch (IntNo) {
1226   default:
1227     return SDValue(); // Don't custom lower most intrinsics.
1228 
1229   case Intrinsic::wasm_lsda: {
1230     EVT VT = Op.getValueType();
1231     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1232     MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
1233     auto &Context = MF.getMMI().getContext();
1234     MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") +
1235                                             Twine(MF.getFunctionNumber()));
1236     return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1237                        DAG.getMCSymbol(S, PtrVT));
1238   }
1239 
1240   case Intrinsic::wasm_throw: {
1241     // We only support C++ exceptions for now
1242     int Tag = cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1243     if (Tag != CPP_EXCEPTION)
1244       llvm_unreachable("Invalid tag!");
1245     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1246     MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
1247     const char *SymName = MF.createExternalSymbolName("__cpp_exception");
1248     SDValue SymNode = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
1249                                   DAG.getTargetExternalSymbol(SymName, PtrVT));
1250     return DAG.getNode(WebAssemblyISD::THROW, DL,
1251                        MVT::Other, // outchain type
1252                        {
1253                            Op.getOperand(0), // inchain
1254                            SymNode,          // exception symbol
1255                            Op.getOperand(3)  // thrown value
1256                        });
1257   }
1258   }
1259 }
1260 
1261 SDValue
1262 WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
1263                                                   SelectionDAG &DAG) const {
1264   SDLoc DL(Op);
1265   // If sign extension operations are disabled, allow sext_inreg only if operand
1266   // is a vector extract. SIMD does not depend on sign extension operations, but
1267   // allowing sext_inreg in this context lets us have simple patterns to select
1268   // extract_lane_s instructions. Expanding sext_inreg everywhere would be
1269   // simpler in this file, but would necessitate large and brittle patterns to
1270   // undo the expansion and select extract_lane_s instructions.
1271   assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
1272   if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1273     const SDValue &Extract = Op.getOperand(0);
1274     MVT VecT = Extract.getOperand(0).getSimpleValueType();
1275     MVT ExtractedLaneT = static_cast<VTSDNode *>(Op.getOperand(1).getNode())
1276                              ->getVT()
1277                              .getSimpleVT();
1278     MVT ExtractedVecT =
1279         MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
1280     if (ExtractedVecT == VecT)
1281       return Op;
1282     // Bitcast vector to appropriate type to ensure ISel pattern coverage
1283     const SDValue &Index = Extract.getOperand(1);
1284     unsigned IndexVal =
1285         static_cast<ConstantSDNode *>(Index.getNode())->getZExtValue();
1286     unsigned Scale =
1287         ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
1288     assert(Scale > 1);
1289     SDValue NewIndex =
1290         DAG.getConstant(IndexVal * Scale, DL, Index.getValueType());
1291     SDValue NewExtract = DAG.getNode(
1292         ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(),
1293         DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
1294     return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(),
1295                        NewExtract, Op.getOperand(1));
1296   }
1297   // Otherwise expand
1298   return SDValue();
1299 }
1300 
1301 SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
1302                                                      SelectionDAG &DAG) const {
1303   SDLoc DL(Op);
1304   const EVT VecT = Op.getValueType();
1305   const EVT LaneT = Op.getOperand(0).getValueType();
1306   const size_t Lanes = Op.getNumOperands();
1307   bool CanSwizzle = Subtarget->hasUnimplementedSIMD128() && VecT == MVT::v16i8;
1308 
1309   // BUILD_VECTORs are lowered to the instruction that initializes the highest
1310   // possible number of lanes at once followed by a sequence of replace_lane
1311   // instructions to individually initialize any remaining lanes.
1312 
1313   // TODO: Tune this. For example, lanewise swizzling is very expensive, so
1314   // swizzled lanes should be given greater weight.
1315 
1316   // TODO: Investigate building vectors by shuffling together vectors built by
1317   // separately specialized means.
1318 
1319   auto IsConstant = [](const SDValue &V) {
1320     return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
1321   };
1322 
1323   // Returns the source vector and index vector pair if they exist. Checks for:
1324   //   (extract_vector_elt
1325   //     $src,
1326   //     (sign_extend_inreg (extract_vector_elt $indices, $i))
1327   //   )
1328   auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
1329     auto Bail = std::make_pair(SDValue(), SDValue());
1330     if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1331       return Bail;
1332     const SDValue &SwizzleSrc = Lane->getOperand(0);
1333     const SDValue &IndexExt = Lane->getOperand(1);
1334     if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
1335       return Bail;
1336     const SDValue &Index = IndexExt->getOperand(0);
1337     if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1338       return Bail;
1339     const SDValue &SwizzleIndices = Index->getOperand(0);
1340     if (SwizzleSrc.getValueType() != MVT::v16i8 ||
1341         SwizzleIndices.getValueType() != MVT::v16i8 ||
1342         Index->getOperand(1)->getOpcode() != ISD::Constant ||
1343         Index->getConstantOperandVal(1) != I)
1344       return Bail;
1345     return std::make_pair(SwizzleSrc, SwizzleIndices);
1346   };
1347 
1348   using ValueEntry = std::pair<SDValue, size_t>;
1349   SmallVector<ValueEntry, 16> SplatValueCounts;
1350 
1351   using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
1352   SmallVector<SwizzleEntry, 16> SwizzleCounts;
1353 
1354   auto AddCount = [](auto &Counts, const auto &Val) {
1355     auto CountIt = std::find_if(Counts.begin(), Counts.end(),
1356                                 [&Val](auto E) { return E.first == Val; });
1357     if (CountIt == Counts.end()) {
1358       Counts.emplace_back(Val, 1);
1359     } else {
1360       CountIt->second++;
1361     }
1362   };
1363 
1364   auto GetMostCommon = [](auto &Counts) {
1365     auto CommonIt =
1366         std::max_element(Counts.begin(), Counts.end(),
1367                          [](auto A, auto B) { return A.second < B.second; });
1368     assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
1369     return *CommonIt;
1370   };
1371 
1372   size_t NumConstantLanes = 0;
1373 
1374   // Count eligible lanes for each type of vector creation op
1375   for (size_t I = 0; I < Lanes; ++I) {
1376     const SDValue &Lane = Op->getOperand(I);
1377     if (Lane.isUndef())
1378       continue;
1379 
1380     AddCount(SplatValueCounts, Lane);
1381 
1382     if (IsConstant(Lane)) {
1383       NumConstantLanes++;
1384     } else if (CanSwizzle) {
1385       auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
1386       if (SwizzleSrcs.first)
1387         AddCount(SwizzleCounts, SwizzleSrcs);
1388     }
1389   }
1390 
1391   SDValue SplatValue;
1392   size_t NumSplatLanes;
1393   std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
1394 
1395   SDValue SwizzleSrc;
1396   SDValue SwizzleIndices;
1397   size_t NumSwizzleLanes = 0;
1398   if (SwizzleCounts.size())
1399     std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
1400                           NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
1401 
1402   // Predicate returning true if the lane is properly initialized by the
1403   // original instruction
1404   std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
1405   SDValue Result;
1406   if (Subtarget->hasUnimplementedSIMD128()) {
1407     // Prefer swizzles over vector consts over splats
1408     if (NumSwizzleLanes >= NumSplatLanes &&
1409         NumSwizzleLanes >= NumConstantLanes) {
1410       Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
1411                            SwizzleIndices);
1412       auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
1413       IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
1414         return Swizzled == GetSwizzleSrcs(I, Lane);
1415       };
1416     } else if (NumConstantLanes >= NumSplatLanes) {
1417       SmallVector<SDValue, 16> ConstLanes;
1418       for (const SDValue &Lane : Op->op_values()) {
1419         if (IsConstant(Lane)) {
1420           ConstLanes.push_back(Lane);
1421         } else if (LaneT.isFloatingPoint()) {
1422           ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
1423         } else {
1424           ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
1425         }
1426       }
1427       Result = DAG.getBuildVector(VecT, DL, ConstLanes);
1428       IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
1429         return IsConstant(Lane);
1430       };
1431     }
1432   }
1433   if (!Result) {
1434     // Use a splat, but possibly a load_splat
1435     LoadSDNode *SplattedLoad;
1436     if (Subtarget->hasUnimplementedSIMD128() &&
1437         (SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) &&
1438         SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) {
1439       Result = DAG.getMemIntrinsicNode(
1440           WebAssemblyISD::LOAD_SPLAT, DL, DAG.getVTList(VecT),
1441           {SplattedLoad->getChain(), SplattedLoad->getBasePtr(),
1442            SplattedLoad->getOffset()},
1443           SplattedLoad->getMemoryVT(), SplattedLoad->getMemOperand());
1444     } else {
1445       Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
1446     }
1447     IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
1448       return Lane == SplatValue;
1449     };
1450   }
1451 
1452   // Add replace_lane instructions for any unhandled values
1453   for (size_t I = 0; I < Lanes; ++I) {
1454     const SDValue &Lane = Op->getOperand(I);
1455     if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
1456       Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
1457                            DAG.getConstant(I, DL, MVT::i32));
1458   }
1459 
1460   return Result;
1461 }
1462 
1463 SDValue
1464 WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
1465                                                SelectionDAG &DAG) const {
1466   SDLoc DL(Op);
1467   ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
1468   MVT VecType = Op.getOperand(0).getSimpleValueType();
1469   assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
1470   size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
1471 
1472   // Space for two vector args and sixteen mask indices
1473   SDValue Ops[18];
1474   size_t OpIdx = 0;
1475   Ops[OpIdx++] = Op.getOperand(0);
1476   Ops[OpIdx++] = Op.getOperand(1);
1477 
1478   // Expand mask indices to byte indices and materialize them as operands
1479   for (int M : Mask) {
1480     for (size_t J = 0; J < LaneBytes; ++J) {
1481       // Lower undefs (represented by -1 in mask) to zero
1482       uint64_t ByteIndex = M == -1 ? 0 : (uint64_t)M * LaneBytes + J;
1483       Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
1484     }
1485   }
1486 
1487   return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
1488 }
1489 
1490 SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
1491                                               SelectionDAG &DAG) const {
1492   SDLoc DL(Op);
1493   // The legalizer does not know how to expand the comparison modes of i64x2
1494   // vectors because no comparison modes are supported. We could solve this by
1495   // expanding all i64x2 SETCC nodes, but that seems to expand f64x2 SETCC nodes
1496   // (which return i64x2 results) as well. So instead we manually unroll i64x2
1497   // comparisons here.
1498   assert(Subtarget->hasUnimplementedSIMD128());
1499   assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
1500   SmallVector<SDValue, 2> LHS, RHS;
1501   DAG.ExtractVectorElements(Op->getOperand(0), LHS);
1502   DAG.ExtractVectorElements(Op->getOperand(1), RHS);
1503   const SDValue &CC = Op->getOperand(2);
1504   auto MakeLane = [&](unsigned I) {
1505     return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
1506                        DAG.getConstant(uint64_t(-1), DL, MVT::i64),
1507                        DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
1508   };
1509   return DAG.getBuildVector(Op->getValueType(0), DL,
1510                             {MakeLane(0), MakeLane(1)});
1511 }
1512 
1513 SDValue
1514 WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
1515                                                     SelectionDAG &DAG) const {
1516   // Allow constant lane indices, expand variable lane indices
1517   SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
1518   if (isa<ConstantSDNode>(IdxNode) || IdxNode->isUndef())
1519     return Op;
1520   else
1521     // Perform default expansion
1522     return SDValue();
1523 }
1524 
1525 static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) {
1526   EVT LaneT = Op.getSimpleValueType().getVectorElementType();
1527   // 32-bit and 64-bit unrolled shifts will have proper semantics
1528   if (LaneT.bitsGE(MVT::i32))
1529     return DAG.UnrollVectorOp(Op.getNode());
1530   // Otherwise mask the shift value to get proper semantics from 32-bit shift
1531   SDLoc DL(Op);
1532   SDValue ShiftVal = Op.getOperand(1);
1533   uint64_t MaskVal = LaneT.getSizeInBits() - 1;
1534   SDValue MaskedShiftVal = DAG.getNode(
1535       ISD::AND,                    // mask opcode
1536       DL, ShiftVal.getValueType(), // masked value type
1537       ShiftVal,                    // original shift value operand
1538       DAG.getConstant(MaskVal, DL, ShiftVal.getValueType()) // mask operand
1539   );
1540 
1541   return DAG.UnrollVectorOp(
1542       DAG.getNode(Op.getOpcode(),        // original shift opcode
1543                   DL, Op.getValueType(), // original return type
1544                   Op.getOperand(0),      // original vector operand,
1545                   MaskedShiftVal         // new masked shift value operand
1546                   )
1547           .getNode());
1548 }
1549 
1550 SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
1551                                               SelectionDAG &DAG) const {
1552   SDLoc DL(Op);
1553 
1554   // Only manually lower vector shifts
1555   assert(Op.getSimpleValueType().isVector());
1556 
1557   // Unroll non-splat vector shifts
1558   BuildVectorSDNode *ShiftVec;
1559   SDValue SplatVal;
1560   if (!(ShiftVec = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) ||
1561       !(SplatVal = ShiftVec->getSplatValue()))
1562     return unrollVectorShift(Op, DAG);
1563 
1564   // All splats except i64x2 const splats are handled by patterns
1565   auto *SplatConst = dyn_cast<ConstantSDNode>(SplatVal);
1566   if (!SplatConst || Op.getSimpleValueType() != MVT::v2i64)
1567     return Op;
1568 
1569   // i64x2 const splats are custom lowered to avoid unnecessary wraps
1570   unsigned Opcode;
1571   switch (Op.getOpcode()) {
1572   case ISD::SHL:
1573     Opcode = WebAssemblyISD::VEC_SHL;
1574     break;
1575   case ISD::SRA:
1576     Opcode = WebAssemblyISD::VEC_SHR_S;
1577     break;
1578   case ISD::SRL:
1579     Opcode = WebAssemblyISD::VEC_SHR_U;
1580     break;
1581   default:
1582     llvm_unreachable("unexpected opcode");
1583   }
1584   APInt Shift = SplatConst->getAPIntValue().zextOrTrunc(32);
1585   return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0),
1586                      DAG.getConstant(Shift, DL, MVT::i32));
1587 }
1588 
1589 //===----------------------------------------------------------------------===//
1590 //                          WebAssembly Optimization Hooks
1591 //===----------------------------------------------------------------------===//
1592