1 //=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the WebAssemblyTargetLowering class.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "WebAssemblyISelLowering.h"
15 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
16 #include "Utils/WebAssemblyUtilities.h"
17 #include "WebAssemblyMachineFunctionInfo.h"
18 #include "WebAssemblySubtarget.h"
19 #include "WebAssemblyTargetMachine.h"
20 #include "llvm/CodeGen/Analysis.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineJumpTableInfo.h"
24 #include "llvm/CodeGen/MachineModuleInfo.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/SelectionDAG.h"
27 #include "llvm/CodeGen/WasmEHFuncInfo.h"
28 #include "llvm/IR/DiagnosticInfo.h"
29 #include "llvm/IR/DiagnosticPrinter.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/IntrinsicsWebAssembly.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/MathExtras.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include "llvm/Target/TargetOptions.h"
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "wasm-lower"
41 
42 WebAssemblyTargetLowering::WebAssemblyTargetLowering(
43     const TargetMachine &TM, const WebAssemblySubtarget &STI)
44     : TargetLowering(TM), Subtarget(&STI) {
45   auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
46 
47   // Booleans always contain 0 or 1.
48   setBooleanContents(ZeroOrOneBooleanContent);
49   // Except in SIMD vectors
50   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
51   // We don't know the microarchitecture here, so just reduce register pressure.
52   setSchedulingPreference(Sched::RegPressure);
53   // Tell ISel that we have a stack pointer.
54   setStackPointerRegisterToSaveRestore(
55       Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
56   // Set up the register classes.
57   addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
58   addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
59   addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
60   addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
61   if (Subtarget->hasSIMD128()) {
62     addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
63     addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
64     addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
65     addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
66     addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
67     addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
68   }
69   // Compute derived properties from the register classes.
70   computeRegisterProperties(Subtarget->getRegisterInfo());
71 
72   setOperationAction(ISD::GlobalAddress, MVTPtr, Custom);
73   setOperationAction(ISD::GlobalTLSAddress, MVTPtr, Custom);
74   setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom);
75   setOperationAction(ISD::JumpTable, MVTPtr, Custom);
76   setOperationAction(ISD::BlockAddress, MVTPtr, Custom);
77   setOperationAction(ISD::BRIND, MVT::Other, Custom);
78 
79   // Take the default expansion for va_arg, va_copy, and va_end. There is no
80   // default action for va_start, so we do that custom.
81   setOperationAction(ISD::VASTART, MVT::Other, Custom);
82   setOperationAction(ISD::VAARG, MVT::Other, Expand);
83   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
84   setOperationAction(ISD::VAEND, MVT::Other, Expand);
85 
86   for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
87     // Don't expand the floating-point types to constant pools.
88     setOperationAction(ISD::ConstantFP, T, Legal);
89     // Expand floating-point comparisons.
90     for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
91                     ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
92       setCondCodeAction(CC, T, Expand);
93     // Expand floating-point library function operators.
94     for (auto Op :
95          {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA})
96       setOperationAction(Op, T, Expand);
97     // Note supported floating-point library function operators that otherwise
98     // default to expand.
99     for (auto Op :
100          {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT})
101       setOperationAction(Op, T, Legal);
102     // Support minimum and maximum, which otherwise default to expand.
103     setOperationAction(ISD::FMINIMUM, T, Legal);
104     setOperationAction(ISD::FMAXIMUM, T, Legal);
105     // WebAssembly currently has no builtin f16 support.
106     setOperationAction(ISD::FP16_TO_FP, T, Expand);
107     setOperationAction(ISD::FP_TO_FP16, T, Expand);
108     setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand);
109     setTruncStoreAction(T, MVT::f16, Expand);
110   }
111 
112   // Expand unavailable integer operations.
113   for (auto Op :
114        {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,
115         ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS,
116         ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) {
117     for (auto T : {MVT::i32, MVT::i64})
118       setOperationAction(Op, T, Expand);
119     if (Subtarget->hasSIMD128())
120       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
121         setOperationAction(Op, T, Expand);
122   }
123 
124   if (Subtarget->hasNontrappingFPToInt())
125     for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT})
126       for (auto T : {MVT::i32, MVT::i64})
127         setOperationAction(Op, T, Custom);
128 
129   // SIMD-specific configuration
130   if (Subtarget->hasSIMD128()) {
131     // Hoist bitcasts out of shuffles
132     setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
133 
134     // Combine extends of extract_subvectors into widening ops
135     setTargetDAGCombine(ISD::SIGN_EXTEND);
136     setTargetDAGCombine(ISD::ZERO_EXTEND);
137 
138     // Combine int_to_fp of extract_vectors and vice versa into conversions ops
139     setTargetDAGCombine(ISD::SINT_TO_FP);
140     setTargetDAGCombine(ISD::UINT_TO_FP);
141     setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
142 
143     // Combine concat of {s,u}int_to_fp_sat to i32x4.trunc_sat_f64x2_zero_{s,u}
144     setTargetDAGCombine(ISD::CONCAT_VECTORS);
145 
146     // Support saturating add for i8x16 and i16x8
147     for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
148       for (auto T : {MVT::v16i8, MVT::v8i16})
149         setOperationAction(Op, T, Legal);
150 
151     // Support integer abs
152     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
153       setOperationAction(ISD::ABS, T, Legal);
154 
155     // Custom lower BUILD_VECTORs to minimize number of replace_lanes
156     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
157                    MVT::v2f64})
158       setOperationAction(ISD::BUILD_VECTOR, T, Custom);
159 
160     // We have custom shuffle lowering to expose the shuffle mask
161     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
162                    MVT::v2f64})
163       setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
164 
165     // Custom lowering since wasm shifts must have a scalar shift amount
166     for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
167       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
168         setOperationAction(Op, T, Custom);
169 
170     // Custom lower lane accesses to expand out variable indices
171     for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT})
172       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
173                      MVT::v2f64})
174         setOperationAction(Op, T, Custom);
175 
176     // There is no i8x16.mul instruction
177     setOperationAction(ISD::MUL, MVT::v16i8, Expand);
178 
179     // There is no vector conditional select instruction
180     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
181                    MVT::v2f64})
182       setOperationAction(ISD::SELECT_CC, T, Expand);
183 
184     // Expand integer operations supported for scalars but not SIMD
185     for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV,
186                     ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})
187       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
188         setOperationAction(Op, T, Expand);
189 
190     // But we do have integer min and max operations
191     for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
192       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
193         setOperationAction(Op, T, Legal);
194 
195     // Expand float operations supported for scalars but not SIMD
196     for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
197                     ISD::FEXP, ISD::FEXP2, ISD::FRINT})
198       for (auto T : {MVT::v4f32, MVT::v2f64})
199         setOperationAction(Op, T, Expand);
200 
201     // Unsigned comparison operations are unavailable for i64x2 vectors.
202     for (auto CC : {ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE})
203       setCondCodeAction(CC, MVT::v2i64, Custom);
204 
205     // 64x2 conversions are not in the spec
206     for (auto Op :
207          {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT})
208       for (auto T : {MVT::v2i64, MVT::v2f64})
209         setOperationAction(Op, T, Expand);
210 
211     // But saturating fp_to_int converstions are
212     for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT})
213       setOperationAction(Op, MVT::v4i32, Custom);
214   }
215 
216   // As a special case, these operators use the type to mean the type to
217   // sign-extend from.
218   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
219   if (!Subtarget->hasSignExt()) {
220     // Sign extends are legal only when extending a vector extract
221     auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
222     for (auto T : {MVT::i8, MVT::i16, MVT::i32})
223       setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action);
224   }
225   for (auto T : MVT::integer_fixedlen_vector_valuetypes())
226     setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
227 
228   // Dynamic stack allocation: use the default expansion.
229   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
230   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
231   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);
232 
233   setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
234   setOperationAction(ISD::FrameIndex, MVT::i64, Custom);
235   setOperationAction(ISD::CopyToReg, MVT::Other, Custom);
236 
237   // Expand these forms; we pattern-match the forms that we can handle in isel.
238   for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
239     for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
240       setOperationAction(Op, T, Expand);
241 
242   // We have custom switch handling.
243   setOperationAction(ISD::BR_JT, MVT::Other, Custom);
244 
245   // WebAssembly doesn't have:
246   //  - Floating-point extending loads.
247   //  - Floating-point truncating stores.
248   //  - i1 extending loads.
249   //  - truncating SIMD stores and most extending loads
250   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
251   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
252   for (auto T : MVT::integer_valuetypes())
253     for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
254       setLoadExtAction(Ext, T, MVT::i1, Promote);
255   if (Subtarget->hasSIMD128()) {
256     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
257                    MVT::v2f64}) {
258       for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
259         if (MVT(T) != MemT) {
260           setTruncStoreAction(T, MemT, Expand);
261           for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
262             setLoadExtAction(Ext, T, MemT, Expand);
263         }
264       }
265     }
266     // But some vector extending loads are legal
267     for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
268       setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
269       setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
270       setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
271     }
272     // And some truncating stores are legal as well
273     setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
274     setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
275   }
276 
277   // Don't do anything clever with build_pairs
278   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
279 
280   // Trap lowers to wasm unreachable
281   setOperationAction(ISD::TRAP, MVT::Other, Legal);
282   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
283 
284   // Exception handling intrinsics
285   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
286   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
287   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
288 
289   setMaxAtomicSizeInBitsSupported(64);
290 
291   // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is
292   // consistent with the f64 and f128 names.
293   setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
294   setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
295 
296   // Define the emscripten name for return address helper.
297   // TODO: when implementing other Wasm backends, make this generic or only do
298   // this on emscripten depending on what they end up doing.
299   setLibcallName(RTLIB::RETURN_ADDRESS, "emscripten_return_address");
300 
301   // Always convert switches to br_tables unless there is only one case, which
302   // is equivalent to a simple branch. This reduces code size for wasm, and we
303   // defer possible jump table optimizations to the VM.
304   setMinimumJumpTableEntries(2);
305 }
306 
307 TargetLowering::AtomicExpansionKind
308 WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
309   // We have wasm instructions for these
310   switch (AI->getOperation()) {
311   case AtomicRMWInst::Add:
312   case AtomicRMWInst::Sub:
313   case AtomicRMWInst::And:
314   case AtomicRMWInst::Or:
315   case AtomicRMWInst::Xor:
316   case AtomicRMWInst::Xchg:
317     return AtomicExpansionKind::None;
318   default:
319     break;
320   }
321   return AtomicExpansionKind::CmpXChg;
322 }
323 
324 FastISel *WebAssemblyTargetLowering::createFastISel(
325     FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
326   return WebAssembly::createFastISel(FuncInfo, LibInfo);
327 }
328 
329 MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
330                                                       EVT VT) const {
331   unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
332   if (BitWidth > 1 && BitWidth < 8)
333     BitWidth = 8;
334 
335   if (BitWidth > 64) {
336     // The shift will be lowered to a libcall, and compiler-rt libcalls expect
337     // the count to be an i32.
338     BitWidth = 32;
339     assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) &&
340            "32-bit shift counts ought to be enough for anyone");
341   }
342 
343   MVT Result = MVT::getIntegerVT(BitWidth);
344   assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE &&
345          "Unable to represent scalar shift amount type");
346   return Result;
347 }
348 
349 // Lower an fp-to-int conversion operator from the LLVM opcode, which has an
350 // undefined result on invalid/overflow, to the WebAssembly opcode, which
351 // traps on invalid/overflow.
352 static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
353                                        MachineBasicBlock *BB,
354                                        const TargetInstrInfo &TII,
355                                        bool IsUnsigned, bool Int64,
356                                        bool Float64, unsigned LoweredOpcode) {
357   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
358 
359   Register OutReg = MI.getOperand(0).getReg();
360   Register InReg = MI.getOperand(1).getReg();
361 
362   unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
363   unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
364   unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
365   unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
366   unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
367   unsigned Eqz = WebAssembly::EQZ_I32;
368   unsigned And = WebAssembly::AND_I32;
369   int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
370   int64_t Substitute = IsUnsigned ? 0 : Limit;
371   double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
372   auto &Context = BB->getParent()->getFunction().getContext();
373   Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
374 
375   const BasicBlock *LLVMBB = BB->getBasicBlock();
376   MachineFunction *F = BB->getParent();
377   MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
378   MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
379   MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
380 
381   MachineFunction::iterator It = ++BB->getIterator();
382   F->insert(It, FalseMBB);
383   F->insert(It, TrueMBB);
384   F->insert(It, DoneMBB);
385 
386   // Transfer the remainder of BB and its successor edges to DoneMBB.
387   DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
388   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
389 
390   BB->addSuccessor(TrueMBB);
391   BB->addSuccessor(FalseMBB);
392   TrueMBB->addSuccessor(DoneMBB);
393   FalseMBB->addSuccessor(DoneMBB);
394 
395   unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
396   Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
397   Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
398   CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
399   EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
400   FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
401   TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
402 
403   MI.eraseFromParent();
404   // For signed numbers, we can do a single comparison to determine whether
405   // fabs(x) is within range.
406   if (IsUnsigned) {
407     Tmp0 = InReg;
408   } else {
409     BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
410   }
411   BuildMI(BB, DL, TII.get(FConst), Tmp1)
412       .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
413   BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
414 
415   // For unsigned numbers, we have to do a separate comparison with zero.
416   if (IsUnsigned) {
417     Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
418     Register SecondCmpReg =
419         MRI.createVirtualRegister(&WebAssembly::I32RegClass);
420     Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
421     BuildMI(BB, DL, TII.get(FConst), Tmp1)
422         .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
423     BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
424     BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
425     CmpReg = AndReg;
426   }
427 
428   BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
429 
430   // Create the CFG diamond to select between doing the conversion or using
431   // the substitute value.
432   BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
433   BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
434   BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
435   BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
436   BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
437       .addReg(FalseReg)
438       .addMBB(FalseMBB)
439       .addReg(TrueReg)
440       .addMBB(TrueMBB);
441 
442   return DoneMBB;
443 }
444 
445 static MachineBasicBlock *
446 LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB,
447                  const WebAssemblySubtarget *Subtarget,
448                  const TargetInstrInfo &TII) {
449   MachineInstr &CallParams = *CallResults.getPrevNode();
450   assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
451   assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
452          CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
453 
454   bool IsIndirect = CallParams.getOperand(0).isReg();
455   bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
456 
457   unsigned CallOp;
458   if (IsIndirect && IsRetCall) {
459     CallOp = WebAssembly::RET_CALL_INDIRECT;
460   } else if (IsIndirect) {
461     CallOp = WebAssembly::CALL_INDIRECT;
462   } else if (IsRetCall) {
463     CallOp = WebAssembly::RET_CALL;
464   } else {
465     CallOp = WebAssembly::CALL;
466   }
467 
468   MachineFunction &MF = *BB->getParent();
469   const MCInstrDesc &MCID = TII.get(CallOp);
470   MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
471 
472   // See if we must truncate the function pointer.
473   // CALL_INDIRECT takes an i32, but in wasm64 we represent function pointers
474   // as 64-bit for uniformity with other pointer types.
475   // See also: WebAssemblyFastISel::selectCall
476   if (IsIndirect && MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()) {
477     Register Reg32 =
478         MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
479     auto &FnPtr = CallParams.getOperand(0);
480     BuildMI(*BB, CallResults.getIterator(), DL,
481             TII.get(WebAssembly::I32_WRAP_I64), Reg32)
482         .addReg(FnPtr.getReg());
483     FnPtr.setReg(Reg32);
484   }
485 
486   // Move the function pointer to the end of the arguments for indirect calls
487   if (IsIndirect) {
488     auto FnPtr = CallParams.getOperand(0);
489     CallParams.RemoveOperand(0);
490     CallParams.addOperand(FnPtr);
491   }
492 
493   for (auto Def : CallResults.defs())
494     MIB.add(Def);
495 
496   if (IsIndirect) {
497     // Placeholder for the type index.
498     MIB.addImm(0);
499     // The table into which this call_indirect indexes.
500     MCSymbolWasm *Table =
501         WebAssembly::getOrCreateFunctionTableSymbol(MF.getContext(), Subtarget);
502     if (Subtarget->hasReferenceTypes()) {
503       MIB.addSym(Table);
504     } else {
505       // For the MVP there is at most one table whose number is 0, but we can't
506       // write a table symbol or issue relocations.  Instead we just ensure the
507       // table is live and write a zero.
508       Table->setNoStrip();
509       MIB.addImm(0);
510     }
511   }
512 
513   for (auto Use : CallParams.uses())
514     MIB.add(Use);
515 
516   BB->insert(CallResults.getIterator(), MIB);
517   CallParams.eraseFromParent();
518   CallResults.eraseFromParent();
519 
520   return BB;
521 }
522 
523 MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
524     MachineInstr &MI, MachineBasicBlock *BB) const {
525   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
526   DebugLoc DL = MI.getDebugLoc();
527 
528   switch (MI.getOpcode()) {
529   default:
530     llvm_unreachable("Unexpected instr type to insert");
531   case WebAssembly::FP_TO_SINT_I32_F32:
532     return LowerFPToInt(MI, DL, BB, TII, false, false, false,
533                         WebAssembly::I32_TRUNC_S_F32);
534   case WebAssembly::FP_TO_UINT_I32_F32:
535     return LowerFPToInt(MI, DL, BB, TII, true, false, false,
536                         WebAssembly::I32_TRUNC_U_F32);
537   case WebAssembly::FP_TO_SINT_I64_F32:
538     return LowerFPToInt(MI, DL, BB, TII, false, true, false,
539                         WebAssembly::I64_TRUNC_S_F32);
540   case WebAssembly::FP_TO_UINT_I64_F32:
541     return LowerFPToInt(MI, DL, BB, TII, true, true, false,
542                         WebAssembly::I64_TRUNC_U_F32);
543   case WebAssembly::FP_TO_SINT_I32_F64:
544     return LowerFPToInt(MI, DL, BB, TII, false, false, true,
545                         WebAssembly::I32_TRUNC_S_F64);
546   case WebAssembly::FP_TO_UINT_I32_F64:
547     return LowerFPToInt(MI, DL, BB, TII, true, false, true,
548                         WebAssembly::I32_TRUNC_U_F64);
549   case WebAssembly::FP_TO_SINT_I64_F64:
550     return LowerFPToInt(MI, DL, BB, TII, false, true, true,
551                         WebAssembly::I64_TRUNC_S_F64);
552   case WebAssembly::FP_TO_UINT_I64_F64:
553     return LowerFPToInt(MI, DL, BB, TII, true, true, true,
554                         WebAssembly::I64_TRUNC_U_F64);
555   case WebAssembly::CALL_RESULTS:
556   case WebAssembly::RET_CALL_RESULTS:
557     return LowerCallResults(MI, DL, BB, Subtarget, TII);
558   }
559 }
560 
561 const char *
562 WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {
563   switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
564   case WebAssemblyISD::FIRST_NUMBER:
565   case WebAssemblyISD::FIRST_MEM_OPCODE:
566     break;
567 #define HANDLE_NODETYPE(NODE)                                                  \
568   case WebAssemblyISD::NODE:                                                   \
569     return "WebAssemblyISD::" #NODE;
570 #define HANDLE_MEM_NODETYPE(NODE) HANDLE_NODETYPE(NODE)
571 #include "WebAssemblyISD.def"
572 #undef HANDLE_MEM_NODETYPE
573 #undef HANDLE_NODETYPE
574   }
575   return nullptr;
576 }
577 
578 std::pair<unsigned, const TargetRegisterClass *>
579 WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
580     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
581   // First, see if this is a constraint that directly corresponds to a
582   // WebAssembly register class.
583   if (Constraint.size() == 1) {
584     switch (Constraint[0]) {
585     case 'r':
586       assert(VT != MVT::iPTR && "Pointer MVT not expected here");
587       if (Subtarget->hasSIMD128() && VT.isVector()) {
588         if (VT.getSizeInBits() == 128)
589           return std::make_pair(0U, &WebAssembly::V128RegClass);
590       }
591       if (VT.isInteger() && !VT.isVector()) {
592         if (VT.getSizeInBits() <= 32)
593           return std::make_pair(0U, &WebAssembly::I32RegClass);
594         if (VT.getSizeInBits() <= 64)
595           return std::make_pair(0U, &WebAssembly::I64RegClass);
596       }
597       if (VT.isFloatingPoint() && !VT.isVector()) {
598         switch (VT.getSizeInBits()) {
599         case 32:
600           return std::make_pair(0U, &WebAssembly::F32RegClass);
601         case 64:
602           return std::make_pair(0U, &WebAssembly::F64RegClass);
603         default:
604           break;
605         }
606       }
607       break;
608     default:
609       break;
610     }
611   }
612 
613   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
614 }
615 
616 bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const {
617   // Assume ctz is a relatively cheap operation.
618   return true;
619 }
620 
621 bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const {
622   // Assume clz is a relatively cheap operation.
623   return true;
624 }
625 
626 bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
627                                                       const AddrMode &AM,
628                                                       Type *Ty, unsigned AS,
629                                                       Instruction *I) const {
630   // WebAssembly offsets are added as unsigned without wrapping. The
631   // isLegalAddressingMode gives us no way to determine if wrapping could be
632   // happening, so we approximate this by accepting only non-negative offsets.
633   if (AM.BaseOffs < 0)
634     return false;
635 
636   // WebAssembly has no scale register operands.
637   if (AM.Scale != 0)
638     return false;
639 
640   // Everything else is legal.
641   return true;
642 }
643 
644 bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
645     EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/,
646     MachineMemOperand::Flags /*Flags*/, bool *Fast) const {
647   // WebAssembly supports unaligned accesses, though it should be declared
648   // with the p2align attribute on loads and stores which do so, and there
649   // may be a performance impact. We tell LLVM they're "fast" because
650   // for the kinds of things that LLVM uses this for (merging adjacent stores
651   // of constants, etc.), WebAssembly implementations will either want the
652   // unaligned access or they'll split anyway.
653   if (Fast)
654     *Fast = true;
655   return true;
656 }
657 
658 bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
659                                               AttributeList Attr) const {
660   // The current thinking is that wasm engines will perform this optimization,
661   // so we can save on code size.
662   return true;
663 }
664 
665 bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
666   EVT ExtT = ExtVal.getValueType();
667   EVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getValueType(0);
668   return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
669          (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
670          (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
671 }
672 
673 EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
674                                                   LLVMContext &C,
675                                                   EVT VT) const {
676   if (VT.isVector())
677     return VT.changeVectorElementTypeToInteger();
678 
679   // So far, all branch instructions in Wasm take an I32 condition.
680   // The default TargetLowering::getSetCCResultType returns the pointer size,
681   // which would be useful to reduce instruction counts when testing
682   // against 64-bit pointers/values if at some point Wasm supports that.
683   return EVT::getIntegerVT(C, 32);
684 }
685 
686 bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
687                                                    const CallInst &I,
688                                                    MachineFunction &MF,
689                                                    unsigned Intrinsic) const {
690   switch (Intrinsic) {
691   case Intrinsic::wasm_memory_atomic_notify:
692     Info.opc = ISD::INTRINSIC_W_CHAIN;
693     Info.memVT = MVT::i32;
694     Info.ptrVal = I.getArgOperand(0);
695     Info.offset = 0;
696     Info.align = Align(4);
697     // atomic.notify instruction does not really load the memory specified with
698     // this argument, but MachineMemOperand should either be load or store, so
699     // we set this to a load.
700     // FIXME Volatile isn't really correct, but currently all LLVM atomic
701     // instructions are treated as volatiles in the backend, so we should be
702     // consistent. The same applies for wasm_atomic_wait intrinsics too.
703     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
704     return true;
705   case Intrinsic::wasm_memory_atomic_wait32:
706     Info.opc = ISD::INTRINSIC_W_CHAIN;
707     Info.memVT = MVT::i32;
708     Info.ptrVal = I.getArgOperand(0);
709     Info.offset = 0;
710     Info.align = Align(4);
711     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
712     return true;
713   case Intrinsic::wasm_memory_atomic_wait64:
714     Info.opc = ISD::INTRINSIC_W_CHAIN;
715     Info.memVT = MVT::i64;
716     Info.ptrVal = I.getArgOperand(0);
717     Info.offset = 0;
718     Info.align = Align(8);
719     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
720     return true;
721   case Intrinsic::wasm_load32_zero:
722   case Intrinsic::wasm_load64_zero:
723     Info.opc = ISD::INTRINSIC_W_CHAIN;
724     Info.memVT = Intrinsic == Intrinsic::wasm_load32_zero ? MVT::i32 : MVT::i64;
725     Info.ptrVal = I.getArgOperand(0);
726     Info.offset = 0;
727     Info.align = Align(1);
728     Info.flags = MachineMemOperand::MOLoad;
729     return true;
730   case Intrinsic::wasm_load8_lane:
731   case Intrinsic::wasm_load16_lane:
732   case Intrinsic::wasm_load32_lane:
733   case Intrinsic::wasm_load64_lane:
734   case Intrinsic::wasm_store8_lane:
735   case Intrinsic::wasm_store16_lane:
736   case Intrinsic::wasm_store32_lane:
737   case Intrinsic::wasm_store64_lane: {
738     MVT MemVT;
739     switch (Intrinsic) {
740     case Intrinsic::wasm_load8_lane:
741     case Intrinsic::wasm_store8_lane:
742       MemVT = MVT::i8;
743       break;
744     case Intrinsic::wasm_load16_lane:
745     case Intrinsic::wasm_store16_lane:
746       MemVT = MVT::i16;
747       break;
748     case Intrinsic::wasm_load32_lane:
749     case Intrinsic::wasm_store32_lane:
750       MemVT = MVT::i32;
751       break;
752     case Intrinsic::wasm_load64_lane:
753     case Intrinsic::wasm_store64_lane:
754       MemVT = MVT::i64;
755       break;
756     default:
757       llvm_unreachable("unexpected intrinsic");
758     }
759     if (Intrinsic == Intrinsic::wasm_load8_lane ||
760         Intrinsic == Intrinsic::wasm_load16_lane ||
761         Intrinsic == Intrinsic::wasm_load32_lane ||
762         Intrinsic == Intrinsic::wasm_load64_lane) {
763       Info.opc = ISD::INTRINSIC_W_CHAIN;
764       Info.flags = MachineMemOperand::MOLoad;
765     } else {
766       Info.opc = ISD::INTRINSIC_VOID;
767       Info.flags = MachineMemOperand::MOStore;
768     }
769     Info.ptrVal = I.getArgOperand(0);
770     Info.memVT = MemVT;
771     Info.offset = 0;
772     Info.align = Align(1);
773     return true;
774   }
775   default:
776     return false;
777   }
778 }
779 
780 //===----------------------------------------------------------------------===//
781 // WebAssembly Lowering private implementation.
782 //===----------------------------------------------------------------------===//
783 
784 //===----------------------------------------------------------------------===//
785 // Lowering Code
786 //===----------------------------------------------------------------------===//
787 
788 static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
789   MachineFunction &MF = DAG.getMachineFunction();
790   DAG.getContext()->diagnose(
791       DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
792 }
793 
794 // Test whether the given calling convention is supported.
795 static bool callingConvSupported(CallingConv::ID CallConv) {
796   // We currently support the language-independent target-independent
797   // conventions. We don't yet have a way to annotate calls with properties like
798   // "cold", and we don't have any call-clobbered registers, so these are mostly
799   // all handled the same.
800   return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
801          CallConv == CallingConv::Cold ||
802          CallConv == CallingConv::PreserveMost ||
803          CallConv == CallingConv::PreserveAll ||
804          CallConv == CallingConv::CXX_FAST_TLS ||
805          CallConv == CallingConv::WASM_EmscriptenInvoke ||
806          CallConv == CallingConv::Swift;
807 }
808 
809 SDValue
810 WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
811                                      SmallVectorImpl<SDValue> &InVals) const {
812   SelectionDAG &DAG = CLI.DAG;
813   SDLoc DL = CLI.DL;
814   SDValue Chain = CLI.Chain;
815   SDValue Callee = CLI.Callee;
816   MachineFunction &MF = DAG.getMachineFunction();
817   auto Layout = MF.getDataLayout();
818 
819   CallingConv::ID CallConv = CLI.CallConv;
820   if (!callingConvSupported(CallConv))
821     fail(DL, DAG,
822          "WebAssembly doesn't support language-specific or target-specific "
823          "calling conventions yet");
824   if (CLI.IsPatchPoint)
825     fail(DL, DAG, "WebAssembly doesn't support patch point yet");
826 
827   if (CLI.IsTailCall) {
828     auto NoTail = [&](const char *Msg) {
829       if (CLI.CB && CLI.CB->isMustTailCall())
830         fail(DL, DAG, Msg);
831       CLI.IsTailCall = false;
832     };
833 
834     if (!Subtarget->hasTailCall())
835       NoTail("WebAssembly 'tail-call' feature not enabled");
836 
837     // Varargs calls cannot be tail calls because the buffer is on the stack
838     if (CLI.IsVarArg)
839       NoTail("WebAssembly does not support varargs tail calls");
840 
841     // Do not tail call unless caller and callee return types match
842     const Function &F = MF.getFunction();
843     const TargetMachine &TM = getTargetMachine();
844     Type *RetTy = F.getReturnType();
845     SmallVector<MVT, 4> CallerRetTys;
846     SmallVector<MVT, 4> CalleeRetTys;
847     computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
848     computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
849     bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
850                       std::equal(CallerRetTys.begin(), CallerRetTys.end(),
851                                  CalleeRetTys.begin());
852     if (!TypesMatch)
853       NoTail("WebAssembly tail call requires caller and callee return types to "
854              "match");
855 
856     // If pointers to local stack values are passed, we cannot tail call
857     if (CLI.CB) {
858       for (auto &Arg : CLI.CB->args()) {
859         Value *Val = Arg.get();
860         // Trace the value back through pointer operations
861         while (true) {
862           Value *Src = Val->stripPointerCastsAndAliases();
863           if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
864             Src = GEP->getPointerOperand();
865           if (Val == Src)
866             break;
867           Val = Src;
868         }
869         if (isa<AllocaInst>(Val)) {
870           NoTail(
871               "WebAssembly does not support tail calling with stack arguments");
872           break;
873         }
874       }
875     }
876   }
877 
878   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
879   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
880   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
881 
882   // The generic code may have added an sret argument. If we're lowering an
883   // invoke function, the ABI requires that the function pointer be the first
884   // argument, so we may have to swap the arguments.
885   if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
886       Outs[0].Flags.isSRet()) {
887     std::swap(Outs[0], Outs[1]);
888     std::swap(OutVals[0], OutVals[1]);
889   }
890 
891   bool HasSwiftSelfArg = false;
892   bool HasSwiftErrorArg = false;
893   unsigned NumFixedArgs = 0;
894   for (unsigned I = 0; I < Outs.size(); ++I) {
895     const ISD::OutputArg &Out = Outs[I];
896     SDValue &OutVal = OutVals[I];
897     HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
898     HasSwiftErrorArg |= Out.Flags.isSwiftError();
899     if (Out.Flags.isNest())
900       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
901     if (Out.Flags.isInAlloca())
902       fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
903     if (Out.Flags.isInConsecutiveRegs())
904       fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
905     if (Out.Flags.isInConsecutiveRegsLast())
906       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
907     if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
908       auto &MFI = MF.getFrameInfo();
909       int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
910                                      Out.Flags.getNonZeroByValAlign(),
911                                      /*isSS=*/false);
912       SDValue SizeNode =
913           DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
914       SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
915       Chain = DAG.getMemcpy(
916           Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getNonZeroByValAlign(),
917           /*isVolatile*/ false, /*AlwaysInline=*/false,
918           /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
919       OutVal = FINode;
920     }
921     // Count the number of fixed args *after* legalization.
922     NumFixedArgs += Out.IsFixed;
923   }
924 
925   bool IsVarArg = CLI.IsVarArg;
926   auto PtrVT = getPointerTy(Layout);
927 
928   // For swiftcc, emit additional swiftself and swifterror arguments
929   // if there aren't. These additional arguments are also added for callee
930   // signature They are necessary to match callee and caller signature for
931   // indirect call.
932   if (CallConv == CallingConv::Swift) {
933     if (!HasSwiftSelfArg) {
934       NumFixedArgs++;
935       ISD::OutputArg Arg;
936       Arg.Flags.setSwiftSelf();
937       CLI.Outs.push_back(Arg);
938       SDValue ArgVal = DAG.getUNDEF(PtrVT);
939       CLI.OutVals.push_back(ArgVal);
940     }
941     if (!HasSwiftErrorArg) {
942       NumFixedArgs++;
943       ISD::OutputArg Arg;
944       Arg.Flags.setSwiftError();
945       CLI.Outs.push_back(Arg);
946       SDValue ArgVal = DAG.getUNDEF(PtrVT);
947       CLI.OutVals.push_back(ArgVal);
948     }
949   }
950 
951   // Analyze operands of the call, assigning locations to each operand.
952   SmallVector<CCValAssign, 16> ArgLocs;
953   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
954 
955   if (IsVarArg) {
956     // Outgoing non-fixed arguments are placed in a buffer. First
957     // compute their offsets and the total amount of buffer space needed.
958     for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
959       const ISD::OutputArg &Out = Outs[I];
960       SDValue &Arg = OutVals[I];
961       EVT VT = Arg.getValueType();
962       assert(VT != MVT::iPTR && "Legalized args should be concrete");
963       Type *Ty = VT.getTypeForEVT(*DAG.getContext());
964       Align Alignment =
965           std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));
966       unsigned Offset =
967           CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);
968       CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
969                                         Offset, VT.getSimpleVT(),
970                                         CCValAssign::Full));
971     }
972   }
973 
974   unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
975 
976   SDValue FINode;
977   if (IsVarArg && NumBytes) {
978     // For non-fixed arguments, next emit stores to store the argument values
979     // to the stack buffer at the offsets computed above.
980     int FI = MF.getFrameInfo().CreateStackObject(NumBytes,
981                                                  Layout.getStackAlignment(),
982                                                  /*isSS=*/false);
983     unsigned ValNo = 0;
984     SmallVector<SDValue, 8> Chains;
985     for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) {
986       assert(ArgLocs[ValNo].getValNo() == ValNo &&
987              "ArgLocs should remain in order and only hold varargs args");
988       unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
989       FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
990       SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
991                                 DAG.getConstant(Offset, DL, PtrVT));
992       Chains.push_back(
993           DAG.getStore(Chain, DL, Arg, Add,
994                        MachinePointerInfo::getFixedStack(MF, FI, Offset)));
995     }
996     if (!Chains.empty())
997       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
998   } else if (IsVarArg) {
999     FINode = DAG.getIntPtrConstant(0, DL);
1000   }
1001 
1002   if (Callee->getOpcode() == ISD::GlobalAddress) {
1003     // If the callee is a GlobalAddress node (quite common, every direct call
1004     // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1005     // doesn't at MO_GOT which is not needed for direct calls.
1006     GlobalAddressSDNode* GA = cast<GlobalAddressSDNode>(Callee);
1007     Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
1008                                         getPointerTy(DAG.getDataLayout()),
1009                                         GA->getOffset());
1010     Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
1011                          getPointerTy(DAG.getDataLayout()), Callee);
1012   }
1013 
1014   // Compute the operands for the CALLn node.
1015   SmallVector<SDValue, 16> Ops;
1016   Ops.push_back(Chain);
1017   Ops.push_back(Callee);
1018 
1019   // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1020   // isn't reliable.
1021   Ops.append(OutVals.begin(),
1022              IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
1023   // Add a pointer to the vararg buffer.
1024   if (IsVarArg)
1025     Ops.push_back(FINode);
1026 
1027   SmallVector<EVT, 8> InTys;
1028   for (const auto &In : Ins) {
1029     assert(!In.Flags.isByVal() && "byval is not valid for return values");
1030     assert(!In.Flags.isNest() && "nest is not valid for return values");
1031     if (In.Flags.isInAlloca())
1032       fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
1033     if (In.Flags.isInConsecutiveRegs())
1034       fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
1035     if (In.Flags.isInConsecutiveRegsLast())
1036       fail(DL, DAG,
1037            "WebAssembly hasn't implemented cons regs last return values");
1038     // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1039     // registers.
1040     InTys.push_back(In.VT);
1041   }
1042 
1043   if (CLI.IsTailCall) {
1044     // ret_calls do not return values to the current frame
1045     SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1046     return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
1047   }
1048 
1049   InTys.push_back(MVT::Other);
1050   SDVTList InTyList = DAG.getVTList(InTys);
1051   SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
1052 
1053   for (size_t I = 0; I < Ins.size(); ++I)
1054     InVals.push_back(Res.getValue(I));
1055 
1056   // Return the chain
1057   return Res.getValue(Ins.size());
1058 }
1059 
1060 bool WebAssemblyTargetLowering::CanLowerReturn(
1061     CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
1062     const SmallVectorImpl<ISD::OutputArg> &Outs,
1063     LLVMContext & /*Context*/) const {
1064   // WebAssembly can only handle returning tuples with multivalue enabled
1065   return Subtarget->hasMultivalue() || Outs.size() <= 1;
1066 }
1067 
1068 SDValue WebAssemblyTargetLowering::LowerReturn(
1069     SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
1070     const SmallVectorImpl<ISD::OutputArg> &Outs,
1071     const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1072     SelectionDAG &DAG) const {
1073   assert((Subtarget->hasMultivalue() || Outs.size() <= 1) &&
1074          "MVP WebAssembly can only return up to one value");
1075   if (!callingConvSupported(CallConv))
1076     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1077 
1078   SmallVector<SDValue, 4> RetOps(1, Chain);
1079   RetOps.append(OutVals.begin(), OutVals.end());
1080   Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
1081 
1082   // Record the number and types of the return values.
1083   for (const ISD::OutputArg &Out : Outs) {
1084     assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1085     assert(!Out.Flags.isNest() && "nest is not valid for return values");
1086     assert(Out.IsFixed && "non-fixed return value is not valid");
1087     if (Out.Flags.isInAlloca())
1088       fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
1089     if (Out.Flags.isInConsecutiveRegs())
1090       fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
1091     if (Out.Flags.isInConsecutiveRegsLast())
1092       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
1093   }
1094 
1095   return Chain;
1096 }
1097 
1098 SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1099     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1100     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1101     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1102   if (!callingConvSupported(CallConv))
1103     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1104 
1105   MachineFunction &MF = DAG.getMachineFunction();
1106   auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1107 
1108   // Set up the incoming ARGUMENTS value, which serves to represent the liveness
1109   // of the incoming values before they're represented by virtual registers.
1110   MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
1111 
1112   bool HasSwiftErrorArg = false;
1113   bool HasSwiftSelfArg = false;
1114   for (const ISD::InputArg &In : Ins) {
1115     HasSwiftSelfArg |= In.Flags.isSwiftSelf();
1116     HasSwiftErrorArg |= In.Flags.isSwiftError();
1117     if (In.Flags.isInAlloca())
1118       fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1119     if (In.Flags.isNest())
1120       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1121     if (In.Flags.isInConsecutiveRegs())
1122       fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1123     if (In.Flags.isInConsecutiveRegsLast())
1124       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1125     // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1126     // registers.
1127     InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
1128                                            DAG.getTargetConstant(InVals.size(),
1129                                                                  DL, MVT::i32))
1130                              : DAG.getUNDEF(In.VT));
1131 
1132     // Record the number and types of arguments.
1133     MFI->addParam(In.VT);
1134   }
1135 
1136   // For swiftcc, emit additional swiftself and swifterror arguments
1137   // if there aren't. These additional arguments are also added for callee
1138   // signature They are necessary to match callee and caller signature for
1139   // indirect call.
1140   auto PtrVT = getPointerTy(MF.getDataLayout());
1141   if (CallConv == CallingConv::Swift) {
1142     if (!HasSwiftSelfArg) {
1143       MFI->addParam(PtrVT);
1144     }
1145     if (!HasSwiftErrorArg) {
1146       MFI->addParam(PtrVT);
1147     }
1148   }
1149   // Varargs are copied into a buffer allocated by the caller, and a pointer to
1150   // the buffer is passed as an argument.
1151   if (IsVarArg) {
1152     MVT PtrVT = getPointerTy(MF.getDataLayout());
1153     Register VarargVreg =
1154         MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT));
1155     MFI->setVarargBufferVreg(VarargVreg);
1156     Chain = DAG.getCopyToReg(
1157         Chain, DL, VarargVreg,
1158         DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
1159                     DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
1160     MFI->addParam(PtrVT);
1161   }
1162 
1163   // Record the number and types of arguments and results.
1164   SmallVector<MVT, 4> Params;
1165   SmallVector<MVT, 4> Results;
1166   computeSignatureVTs(MF.getFunction().getFunctionType(), &MF.getFunction(),
1167                       MF.getFunction(), DAG.getTarget(), Params, Results);
1168   for (MVT VT : Results)
1169     MFI->addResult(VT);
1170   // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1171   // the param logic here with ComputeSignatureVTs
1172   assert(MFI->getParams().size() == Params.size() &&
1173          std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1174                     Params.begin()));
1175 
1176   return Chain;
1177 }
1178 
1179 void WebAssemblyTargetLowering::ReplaceNodeResults(
1180     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
1181   switch (N->getOpcode()) {
1182   case ISD::SIGN_EXTEND_INREG:
1183     // Do not add any results, signifying that N should not be custom lowered
1184     // after all. This happens because simd128 turns on custom lowering for
1185     // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1186     // illegal type.
1187     break;
1188   default:
1189     llvm_unreachable(
1190         "ReplaceNodeResults not implemented for this op for WebAssembly!");
1191   }
1192 }
1193 
1194 //===----------------------------------------------------------------------===//
1195 //  Custom lowering hooks.
1196 //===----------------------------------------------------------------------===//
1197 
1198 SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1199                                                   SelectionDAG &DAG) const {
1200   SDLoc DL(Op);
1201   switch (Op.getOpcode()) {
1202   default:
1203     llvm_unreachable("unimplemented operation lowering");
1204     return SDValue();
1205   case ISD::FrameIndex:
1206     return LowerFrameIndex(Op, DAG);
1207   case ISD::GlobalAddress:
1208     return LowerGlobalAddress(Op, DAG);
1209   case ISD::GlobalTLSAddress:
1210     return LowerGlobalTLSAddress(Op, DAG);
1211   case ISD::ExternalSymbol:
1212     return LowerExternalSymbol(Op, DAG);
1213   case ISD::JumpTable:
1214     return LowerJumpTable(Op, DAG);
1215   case ISD::BR_JT:
1216     return LowerBR_JT(Op, DAG);
1217   case ISD::VASTART:
1218     return LowerVASTART(Op, DAG);
1219   case ISD::BlockAddress:
1220   case ISD::BRIND:
1221     fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1222     return SDValue();
1223   case ISD::RETURNADDR:
1224     return LowerRETURNADDR(Op, DAG);
1225   case ISD::FRAMEADDR:
1226     return LowerFRAMEADDR(Op, DAG);
1227   case ISD::CopyToReg:
1228     return LowerCopyToReg(Op, DAG);
1229   case ISD::EXTRACT_VECTOR_ELT:
1230   case ISD::INSERT_VECTOR_ELT:
1231     return LowerAccessVectorElement(Op, DAG);
1232   case ISD::INTRINSIC_VOID:
1233   case ISD::INTRINSIC_WO_CHAIN:
1234   case ISD::INTRINSIC_W_CHAIN:
1235     return LowerIntrinsic(Op, DAG);
1236   case ISD::SIGN_EXTEND_INREG:
1237     return LowerSIGN_EXTEND_INREG(Op, DAG);
1238   case ISD::BUILD_VECTOR:
1239     return LowerBUILD_VECTOR(Op, DAG);
1240   case ISD::VECTOR_SHUFFLE:
1241     return LowerVECTOR_SHUFFLE(Op, DAG);
1242   case ISD::SETCC:
1243     return LowerSETCC(Op, DAG);
1244   case ISD::SHL:
1245   case ISD::SRA:
1246   case ISD::SRL:
1247     return LowerShift(Op, DAG);
1248   case ISD::FP_TO_SINT_SAT:
1249   case ISD::FP_TO_UINT_SAT:
1250     return LowerFP_TO_INT_SAT(Op, DAG);
1251   }
1252 }
1253 
1254 SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1255                                                   SelectionDAG &DAG) const {
1256   SDValue Src = Op.getOperand(2);
1257   if (isa<FrameIndexSDNode>(Src.getNode())) {
1258     // CopyToReg nodes don't support FrameIndex operands. Other targets select
1259     // the FI to some LEA-like instruction, but since we don't have that, we
1260     // need to insert some kind of instruction that can take an FI operand and
1261     // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1262     // local.copy between Op and its FI operand.
1263     SDValue Chain = Op.getOperand(0);
1264     SDLoc DL(Op);
1265     unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
1266     EVT VT = Src.getValueType();
1267     SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
1268                                                    : WebAssembly::COPY_I64,
1269                                     DL, VT, Src),
1270                  0);
1271     return Op.getNode()->getNumValues() == 1
1272                ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
1273                : DAG.getCopyToReg(Chain, DL, Reg, Copy,
1274                                   Op.getNumOperands() == 4 ? Op.getOperand(3)
1275                                                            : SDValue());
1276   }
1277   return SDValue();
1278 }
1279 
1280 SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
1281                                                    SelectionDAG &DAG) const {
1282   int FI = cast<FrameIndexSDNode>(Op)->getIndex();
1283   return DAG.getTargetFrameIndex(FI, Op.getValueType());
1284 }
1285 
1286 SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
1287                                                    SelectionDAG &DAG) const {
1288   SDLoc DL(Op);
1289 
1290   if (!Subtarget->getTargetTriple().isOSEmscripten()) {
1291     fail(DL, DAG,
1292          "Non-Emscripten WebAssembly hasn't implemented "
1293          "__builtin_return_address");
1294     return SDValue();
1295   }
1296 
1297   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1298     return SDValue();
1299 
1300   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1301   MakeLibCallOptions CallOptions;
1302   return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
1303                      {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
1304       .first;
1305 }
1306 
1307 SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
1308                                                   SelectionDAG &DAG) const {
1309   // Non-zero depths are not supported by WebAssembly currently. Use the
1310   // legalizer's default expansion, which is to return 0 (what this function is
1311   // documented to do).
1312   if (Op.getConstantOperandVal(0) > 0)
1313     return SDValue();
1314 
1315   DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
1316   EVT VT = Op.getValueType();
1317   Register FP =
1318       Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
1319   return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
1320 }
1321 
1322 SDValue
1323 WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1324                                                  SelectionDAG &DAG) const {
1325   SDLoc DL(Op);
1326   const auto *GA = cast<GlobalAddressSDNode>(Op);
1327   MVT PtrVT = getPointerTy(DAG.getDataLayout());
1328 
1329   MachineFunction &MF = DAG.getMachineFunction();
1330   if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
1331     report_fatal_error("cannot use thread-local storage without bulk memory",
1332                        false);
1333 
1334   const GlobalValue *GV = GA->getGlobal();
1335 
1336   // Currently Emscripten does not support dynamic linking with threads.
1337   // Therefore, if we have thread-local storage, only the local-exec model
1338   // is possible.
1339   // TODO: remove this and implement proper TLS models once Emscripten
1340   // supports dynamic linking with threads.
1341   if (GV->getThreadLocalMode() != GlobalValue::LocalExecTLSModel &&
1342       !Subtarget->getTargetTriple().isOSEmscripten()) {
1343     report_fatal_error("only -ftls-model=local-exec is supported for now on "
1344                        "non-Emscripten OSes: variable " +
1345                            GV->getName(),
1346                        false);
1347   }
1348 
1349   auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
1350                                      : WebAssembly::GLOBAL_GET_I32;
1351   const char *BaseName = MF.createExternalSymbolName("__tls_base");
1352 
1353   SDValue BaseAddr(
1354       DAG.getMachineNode(GlobalGet, DL, PtrVT,
1355                          DAG.getTargetExternalSymbol(BaseName, PtrVT)),
1356       0);
1357 
1358   SDValue TLSOffset = DAG.getTargetGlobalAddress(
1359       GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
1360   SDValue SymAddr = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, TLSOffset);
1361 
1362   return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
1363 }
1364 
1365 SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
1366                                                       SelectionDAG &DAG) const {
1367   SDLoc DL(Op);
1368   const auto *GA = cast<GlobalAddressSDNode>(Op);
1369   EVT VT = Op.getValueType();
1370   assert(GA->getTargetFlags() == 0 &&
1371          "Unexpected target flags on generic GlobalAddressSDNode");
1372   if (GA->getAddressSpace() != 0)
1373     fail(DL, DAG, "WebAssembly only expects the 0 address space");
1374 
1375   unsigned OperandFlags = 0;
1376   if (isPositionIndependent()) {
1377     const GlobalValue *GV = GA->getGlobal();
1378     if (getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) {
1379       MachineFunction &MF = DAG.getMachineFunction();
1380       MVT PtrVT = getPointerTy(MF.getDataLayout());
1381       const char *BaseName;
1382       if (GV->getValueType()->isFunctionTy()) {
1383         BaseName = MF.createExternalSymbolName("__table_base");
1384         OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL;
1385       }
1386       else {
1387         BaseName = MF.createExternalSymbolName("__memory_base");
1388         OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL;
1389       }
1390       SDValue BaseAddr =
1391           DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
1392                       DAG.getTargetExternalSymbol(BaseName, PtrVT));
1393 
1394       SDValue SymAddr = DAG.getNode(
1395           WebAssemblyISD::WrapperPIC, DL, VT,
1396           DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
1397                                      OperandFlags));
1398 
1399       return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
1400     } else {
1401       OperandFlags = WebAssemblyII::MO_GOT;
1402     }
1403   }
1404 
1405   return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1406                      DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
1407                                                 GA->getOffset(), OperandFlags));
1408 }
1409 
1410 SDValue
1411 WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
1412                                                SelectionDAG &DAG) const {
1413   SDLoc DL(Op);
1414   const auto *ES = cast<ExternalSymbolSDNode>(Op);
1415   EVT VT = Op.getValueType();
1416   assert(ES->getTargetFlags() == 0 &&
1417          "Unexpected target flags on generic ExternalSymbolSDNode");
1418   return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1419                      DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
1420 }
1421 
1422 SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
1423                                                   SelectionDAG &DAG) const {
1424   // There's no need for a Wrapper node because we always incorporate a jump
1425   // table operand into a BR_TABLE instruction, rather than ever
1426   // materializing it in a register.
1427   const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1428   return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
1429                                 JT->getTargetFlags());
1430 }
1431 
1432 SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
1433                                               SelectionDAG &DAG) const {
1434   SDLoc DL(Op);
1435   SDValue Chain = Op.getOperand(0);
1436   const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
1437   SDValue Index = Op.getOperand(2);
1438   assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
1439 
1440   SmallVector<SDValue, 8> Ops;
1441   Ops.push_back(Chain);
1442   Ops.push_back(Index);
1443 
1444   MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
1445   const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
1446 
1447   // Add an operand for each case.
1448   for (auto MBB : MBBs)
1449     Ops.push_back(DAG.getBasicBlock(MBB));
1450 
1451   // Add the first MBB as a dummy default target for now. This will be replaced
1452   // with the proper default target (and the preceding range check eliminated)
1453   // if possible by WebAssemblyFixBrTableDefaults.
1454   Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));
1455   return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
1456 }
1457 
1458 SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
1459                                                 SelectionDAG &DAG) const {
1460   SDLoc DL(Op);
1461   EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
1462 
1463   auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
1464   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1465 
1466   SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
1467                                     MFI->getVarargBufferVreg(), PtrVT);
1468   return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
1469                       MachinePointerInfo(SV));
1470 }
1471 
1472 static SDValue getCppExceptionSymNode(SDValue Op, unsigned TagIndex,
1473                                       SelectionDAG &DAG) {
1474   // We only support C++ exceptions for now
1475   int Tag =
1476       cast<ConstantSDNode>(Op.getOperand(TagIndex).getNode())->getZExtValue();
1477   if (Tag != WebAssembly::CPP_EXCEPTION)
1478     llvm_unreachable("Invalid tag: We only support C++ exceptions for now");
1479   auto &MF = DAG.getMachineFunction();
1480   const auto &TLI = DAG.getTargetLoweringInfo();
1481   MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
1482   const char *SymName = MF.createExternalSymbolName("__cpp_exception");
1483   return DAG.getNode(WebAssemblyISD::Wrapper, SDLoc(Op), PtrVT,
1484                      DAG.getTargetExternalSymbol(SymName, PtrVT));
1485 }
1486 
1487 SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
1488                                                   SelectionDAG &DAG) const {
1489   MachineFunction &MF = DAG.getMachineFunction();
1490   unsigned IntNo;
1491   switch (Op.getOpcode()) {
1492   case ISD::INTRINSIC_VOID:
1493   case ISD::INTRINSIC_W_CHAIN:
1494     IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1495     break;
1496   case ISD::INTRINSIC_WO_CHAIN:
1497     IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1498     break;
1499   default:
1500     llvm_unreachable("Invalid intrinsic");
1501   }
1502   SDLoc DL(Op);
1503 
1504   switch (IntNo) {
1505   default:
1506     return SDValue(); // Don't custom lower most intrinsics.
1507 
1508   case Intrinsic::wasm_lsda: {
1509     EVT VT = Op.getValueType();
1510     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1511     MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
1512     auto &Context = MF.getMMI().getContext();
1513     MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") +
1514                                             Twine(MF.getFunctionNumber()));
1515     return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1516                        DAG.getMCSymbol(S, PtrVT));
1517   }
1518 
1519   case Intrinsic::wasm_throw: {
1520     SDValue SymNode = getCppExceptionSymNode(Op, 2, DAG);
1521     return DAG.getNode(WebAssemblyISD::THROW, DL,
1522                        MVT::Other, // outchain type
1523                        {
1524                            Op.getOperand(0), // inchain
1525                            SymNode,          // exception symbol
1526                            Op.getOperand(3)  // thrown value
1527                        });
1528   }
1529 
1530   case Intrinsic::wasm_catch: {
1531     SDValue SymNode = getCppExceptionSymNode(Op, 2, DAG);
1532     return DAG.getNode(WebAssemblyISD::CATCH, DL,
1533                        {
1534                            MVT::i32,  // outchain type
1535                            MVT::Other // return value
1536                        },
1537                        {
1538                            Op.getOperand(0), // inchain
1539                            SymNode           // exception symbol
1540                        });
1541   }
1542 
1543   case Intrinsic::wasm_shuffle: {
1544     // Drop in-chain and replace undefs, but otherwise pass through unchanged
1545     SDValue Ops[18];
1546     size_t OpIdx = 0;
1547     Ops[OpIdx++] = Op.getOperand(1);
1548     Ops[OpIdx++] = Op.getOperand(2);
1549     while (OpIdx < 18) {
1550       const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
1551       if (MaskIdx.isUndef() ||
1552           cast<ConstantSDNode>(MaskIdx.getNode())->getZExtValue() >= 32) {
1553         Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32);
1554       } else {
1555         Ops[OpIdx++] = MaskIdx;
1556       }
1557     }
1558     return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
1559   }
1560   }
1561 }
1562 
1563 SDValue
1564 WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
1565                                                   SelectionDAG &DAG) const {
1566   SDLoc DL(Op);
1567   // If sign extension operations are disabled, allow sext_inreg only if operand
1568   // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
1569   // extension operations, but allowing sext_inreg in this context lets us have
1570   // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
1571   // everywhere would be simpler in this file, but would necessitate large and
1572   // brittle patterns to undo the expansion and select extract_lane_s
1573   // instructions.
1574   assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
1575   if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1576     return SDValue();
1577 
1578   const SDValue &Extract = Op.getOperand(0);
1579   MVT VecT = Extract.getOperand(0).getSimpleValueType();
1580   if (VecT.getVectorElementType().getSizeInBits() > 32)
1581     return SDValue();
1582   MVT ExtractedLaneT =
1583       cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();
1584   MVT ExtractedVecT =
1585       MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
1586   if (ExtractedVecT == VecT)
1587     return Op;
1588 
1589   // Bitcast vector to appropriate type to ensure ISel pattern coverage
1590   const SDNode *Index = Extract.getOperand(1).getNode();
1591   if (!isa<ConstantSDNode>(Index))
1592     return SDValue();
1593   unsigned IndexVal = cast<ConstantSDNode>(Index)->getZExtValue();
1594   unsigned Scale =
1595       ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
1596   assert(Scale > 1);
1597   SDValue NewIndex =
1598       DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));
1599   SDValue NewExtract = DAG.getNode(
1600       ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(),
1601       DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
1602   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
1603                      Op.getOperand(1));
1604 }
1605 
1606 SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
1607                                                      SelectionDAG &DAG) const {
1608   SDLoc DL(Op);
1609   const EVT VecT = Op.getValueType();
1610   const EVT LaneT = Op.getOperand(0).getValueType();
1611   const size_t Lanes = Op.getNumOperands();
1612   bool CanSwizzle = VecT == MVT::v16i8;
1613 
1614   // BUILD_VECTORs are lowered to the instruction that initializes the highest
1615   // possible number of lanes at once followed by a sequence of replace_lane
1616   // instructions to individually initialize any remaining lanes.
1617 
1618   // TODO: Tune this. For example, lanewise swizzling is very expensive, so
1619   // swizzled lanes should be given greater weight.
1620 
1621   // TODO: Investigate looping rather than always extracting/replacing specific
1622   // lanes to fill gaps.
1623 
1624   auto IsConstant = [](const SDValue &V) {
1625     return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
1626   };
1627 
1628   // Returns the source vector and index vector pair if they exist. Checks for:
1629   //   (extract_vector_elt
1630   //     $src,
1631   //     (sign_extend_inreg (extract_vector_elt $indices, $i))
1632   //   )
1633   auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
1634     auto Bail = std::make_pair(SDValue(), SDValue());
1635     if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1636       return Bail;
1637     const SDValue &SwizzleSrc = Lane->getOperand(0);
1638     const SDValue &IndexExt = Lane->getOperand(1);
1639     if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
1640       return Bail;
1641     const SDValue &Index = IndexExt->getOperand(0);
1642     if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1643       return Bail;
1644     const SDValue &SwizzleIndices = Index->getOperand(0);
1645     if (SwizzleSrc.getValueType() != MVT::v16i8 ||
1646         SwizzleIndices.getValueType() != MVT::v16i8 ||
1647         Index->getOperand(1)->getOpcode() != ISD::Constant ||
1648         Index->getConstantOperandVal(1) != I)
1649       return Bail;
1650     return std::make_pair(SwizzleSrc, SwizzleIndices);
1651   };
1652 
1653   // If the lane is extracted from another vector at a constant index, return
1654   // that vector. The source vector must not have more lanes than the dest
1655   // because the shufflevector indices are in terms of the destination lanes and
1656   // would not be able to address the smaller individual source lanes.
1657   auto GetShuffleSrc = [&](const SDValue &Lane) {
1658     if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1659       return SDValue();
1660     if (!isa<ConstantSDNode>(Lane->getOperand(1).getNode()))
1661       return SDValue();
1662     if (Lane->getOperand(0).getValueType().getVectorNumElements() >
1663         VecT.getVectorNumElements())
1664       return SDValue();
1665     return Lane->getOperand(0);
1666   };
1667 
1668   using ValueEntry = std::pair<SDValue, size_t>;
1669   SmallVector<ValueEntry, 16> SplatValueCounts;
1670 
1671   using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
1672   SmallVector<SwizzleEntry, 16> SwizzleCounts;
1673 
1674   using ShuffleEntry = std::pair<SDValue, size_t>;
1675   SmallVector<ShuffleEntry, 16> ShuffleCounts;
1676 
1677   auto AddCount = [](auto &Counts, const auto &Val) {
1678     auto CountIt =
1679         llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
1680     if (CountIt == Counts.end()) {
1681       Counts.emplace_back(Val, 1);
1682     } else {
1683       CountIt->second++;
1684     }
1685   };
1686 
1687   auto GetMostCommon = [](auto &Counts) {
1688     auto CommonIt =
1689         std::max_element(Counts.begin(), Counts.end(),
1690                          [](auto A, auto B) { return A.second < B.second; });
1691     assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
1692     return *CommonIt;
1693   };
1694 
1695   size_t NumConstantLanes = 0;
1696 
1697   // Count eligible lanes for each type of vector creation op
1698   for (size_t I = 0; I < Lanes; ++I) {
1699     const SDValue &Lane = Op->getOperand(I);
1700     if (Lane.isUndef())
1701       continue;
1702 
1703     AddCount(SplatValueCounts, Lane);
1704 
1705     if (IsConstant(Lane))
1706       NumConstantLanes++;
1707     if (auto ShuffleSrc = GetShuffleSrc(Lane))
1708       AddCount(ShuffleCounts, ShuffleSrc);
1709     if (CanSwizzle) {
1710       auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
1711       if (SwizzleSrcs.first)
1712         AddCount(SwizzleCounts, SwizzleSrcs);
1713     }
1714   }
1715 
1716   SDValue SplatValue;
1717   size_t NumSplatLanes;
1718   std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
1719 
1720   SDValue SwizzleSrc;
1721   SDValue SwizzleIndices;
1722   size_t NumSwizzleLanes = 0;
1723   if (SwizzleCounts.size())
1724     std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
1725                           NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
1726 
1727   // Shuffles can draw from up to two vectors, so find the two most common
1728   // sources.
1729   SDValue ShuffleSrc1, ShuffleSrc2;
1730   size_t NumShuffleLanes = 0;
1731   if (ShuffleCounts.size()) {
1732     std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts);
1733     ShuffleCounts.erase(std::remove_if(ShuffleCounts.begin(),
1734                                        ShuffleCounts.end(),
1735                                        [&](const auto &Pair) {
1736                                          return Pair.first == ShuffleSrc1;
1737                                        }),
1738                         ShuffleCounts.end());
1739   }
1740   if (ShuffleCounts.size()) {
1741     size_t AdditionalShuffleLanes;
1742     std::tie(ShuffleSrc2, AdditionalShuffleLanes) =
1743         GetMostCommon(ShuffleCounts);
1744     NumShuffleLanes += AdditionalShuffleLanes;
1745   }
1746 
1747   // Predicate returning true if the lane is properly initialized by the
1748   // original instruction
1749   std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
1750   SDValue Result;
1751   // Prefer swizzles over shuffles over vector consts over splats
1752   if (NumSwizzleLanes >= NumShuffleLanes &&
1753       NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) {
1754     Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
1755                          SwizzleIndices);
1756     auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
1757     IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
1758       return Swizzled == GetSwizzleSrcs(I, Lane);
1759     };
1760   } else if (NumShuffleLanes >= NumConstantLanes &&
1761              NumShuffleLanes >= NumSplatLanes) {
1762     size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8;
1763     size_t DestLaneCount = VecT.getVectorNumElements();
1764     size_t Scale1 = 1;
1765     size_t Scale2 = 1;
1766     SDValue Src1 = ShuffleSrc1;
1767     SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VecT);
1768     if (Src1.getValueType() != VecT) {
1769       size_t LaneSize =
1770           Src1.getValueType().getVectorElementType().getFixedSizeInBits() / 8;
1771       assert(LaneSize > DestLaneSize);
1772       Scale1 = LaneSize / DestLaneSize;
1773       Src1 = DAG.getBitcast(VecT, Src1);
1774     }
1775     if (Src2.getValueType() != VecT) {
1776       size_t LaneSize =
1777           Src2.getValueType().getVectorElementType().getFixedSizeInBits() / 8;
1778       assert(LaneSize > DestLaneSize);
1779       Scale2 = LaneSize / DestLaneSize;
1780       Src2 = DAG.getBitcast(VecT, Src2);
1781     }
1782 
1783     int Mask[16];
1784     assert(DestLaneCount <= 16);
1785     for (size_t I = 0; I < DestLaneCount; ++I) {
1786       const SDValue &Lane = Op->getOperand(I);
1787       SDValue Src = GetShuffleSrc(Lane);
1788       if (Src == ShuffleSrc1) {
1789         Mask[I] = Lane->getConstantOperandVal(1) * Scale1;
1790       } else if (Src && Src == ShuffleSrc2) {
1791         Mask[I] = DestLaneCount + Lane->getConstantOperandVal(1) * Scale2;
1792       } else {
1793         Mask[I] = -1;
1794       }
1795     }
1796     ArrayRef<int> MaskRef(Mask, DestLaneCount);
1797     Result = DAG.getVectorShuffle(VecT, DL, Src1, Src2, MaskRef);
1798     IsLaneConstructed = [&](size_t, const SDValue &Lane) {
1799       auto Src = GetShuffleSrc(Lane);
1800       return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2);
1801     };
1802   } else if (NumConstantLanes >= NumSplatLanes) {
1803     SmallVector<SDValue, 16> ConstLanes;
1804     for (const SDValue &Lane : Op->op_values()) {
1805       if (IsConstant(Lane)) {
1806         ConstLanes.push_back(Lane);
1807       } else if (LaneT.isFloatingPoint()) {
1808         ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
1809       } else {
1810         ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
1811       }
1812     }
1813     Result = DAG.getBuildVector(VecT, DL, ConstLanes);
1814     IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
1815       return IsConstant(Lane);
1816     };
1817   } else {
1818     // Use a splat, but possibly a load_splat
1819     LoadSDNode *SplattedLoad;
1820     if ((SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) &&
1821         SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) {
1822       Result = DAG.getMemIntrinsicNode(
1823           WebAssemblyISD::LOAD_SPLAT, DL, DAG.getVTList(VecT),
1824           {SplattedLoad->getChain(), SplattedLoad->getBasePtr(),
1825            SplattedLoad->getOffset()},
1826           SplattedLoad->getMemoryVT(), SplattedLoad->getMemOperand());
1827     } else {
1828       Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
1829     }
1830     IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
1831       return Lane == SplatValue;
1832     };
1833   }
1834 
1835   assert(Result);
1836   assert(IsLaneConstructed);
1837 
1838   // Add replace_lane instructions for any unhandled values
1839   for (size_t I = 0; I < Lanes; ++I) {
1840     const SDValue &Lane = Op->getOperand(I);
1841     if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
1842       Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
1843                            DAG.getConstant(I, DL, MVT::i32));
1844   }
1845 
1846   return Result;
1847 }
1848 
1849 SDValue
1850 WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
1851                                                SelectionDAG &DAG) const {
1852   SDLoc DL(Op);
1853   ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
1854   MVT VecType = Op.getOperand(0).getSimpleValueType();
1855   assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
1856   size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
1857 
1858   // Space for two vector args and sixteen mask indices
1859   SDValue Ops[18];
1860   size_t OpIdx = 0;
1861   Ops[OpIdx++] = Op.getOperand(0);
1862   Ops[OpIdx++] = Op.getOperand(1);
1863 
1864   // Expand mask indices to byte indices and materialize them as operands
1865   for (int M : Mask) {
1866     for (size_t J = 0; J < LaneBytes; ++J) {
1867       // Lower undefs (represented by -1 in mask) to zero
1868       uint64_t ByteIndex = M == -1 ? 0 : (uint64_t)M * LaneBytes + J;
1869       Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
1870     }
1871   }
1872 
1873   return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
1874 }
1875 
1876 SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
1877                                               SelectionDAG &DAG) const {
1878   SDLoc DL(Op);
1879   // The legalizer does not know how to expand the unsupported comparison modes
1880   // of i64x2 vectors, so we manually unroll them here.
1881   assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
1882   SmallVector<SDValue, 2> LHS, RHS;
1883   DAG.ExtractVectorElements(Op->getOperand(0), LHS);
1884   DAG.ExtractVectorElements(Op->getOperand(1), RHS);
1885   const SDValue &CC = Op->getOperand(2);
1886   auto MakeLane = [&](unsigned I) {
1887     return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
1888                        DAG.getConstant(uint64_t(-1), DL, MVT::i64),
1889                        DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
1890   };
1891   return DAG.getBuildVector(Op->getValueType(0), DL,
1892                             {MakeLane(0), MakeLane(1)});
1893 }
1894 
1895 SDValue
1896 WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
1897                                                     SelectionDAG &DAG) const {
1898   // Allow constant lane indices, expand variable lane indices
1899   SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
1900   if (isa<ConstantSDNode>(IdxNode) || IdxNode->isUndef())
1901     return Op;
1902   else
1903     // Perform default expansion
1904     return SDValue();
1905 }
1906 
1907 static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) {
1908   EVT LaneT = Op.getSimpleValueType().getVectorElementType();
1909   // 32-bit and 64-bit unrolled shifts will have proper semantics
1910   if (LaneT.bitsGE(MVT::i32))
1911     return DAG.UnrollVectorOp(Op.getNode());
1912   // Otherwise mask the shift value to get proper semantics from 32-bit shift
1913   SDLoc DL(Op);
1914   size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
1915   SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
1916   unsigned ShiftOpcode = Op.getOpcode();
1917   SmallVector<SDValue, 16> ShiftedElements;
1918   DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
1919   SmallVector<SDValue, 16> ShiftElements;
1920   DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
1921   SmallVector<SDValue, 16> UnrolledOps;
1922   for (size_t i = 0; i < NumLanes; ++i) {
1923     SDValue MaskedShiftValue =
1924         DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
1925     SDValue ShiftedValue = ShiftedElements[i];
1926     if (ShiftOpcode == ISD::SRA)
1927       ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
1928                                  ShiftedValue, DAG.getValueType(LaneT));
1929     UnrolledOps.push_back(
1930         DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
1931   }
1932   return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
1933 }
1934 
1935 SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
1936                                               SelectionDAG &DAG) const {
1937   SDLoc DL(Op);
1938 
1939   // Only manually lower vector shifts
1940   assert(Op.getSimpleValueType().isVector());
1941 
1942   auto ShiftVal = DAG.getSplatValue(Op.getOperand(1));
1943   if (!ShiftVal)
1944     return unrollVectorShift(Op, DAG);
1945 
1946   // Use anyext because none of the high bits can affect the shift
1947   ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
1948 
1949   unsigned Opcode;
1950   switch (Op.getOpcode()) {
1951   case ISD::SHL:
1952     Opcode = WebAssemblyISD::VEC_SHL;
1953     break;
1954   case ISD::SRA:
1955     Opcode = WebAssemblyISD::VEC_SHR_S;
1956     break;
1957   case ISD::SRL:
1958     Opcode = WebAssemblyISD::VEC_SHR_U;
1959     break;
1960   default:
1961     llvm_unreachable("unexpected opcode");
1962   }
1963 
1964   return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
1965 }
1966 
1967 SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
1968                                                       SelectionDAG &DAG) const {
1969   SDLoc DL(Op);
1970   EVT ResT = Op.getValueType();
1971   EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1972 
1973   if ((ResT == MVT::i32 || ResT == MVT::i64) &&
1974       (SatVT == MVT::i32 || SatVT == MVT::i64))
1975     return Op;
1976 
1977   if (ResT == MVT::v4i32 && SatVT == MVT::i32)
1978     return Op;
1979 
1980   return SDValue();
1981 }
1982 
1983 //===----------------------------------------------------------------------===//
1984 //   Custom DAG combine hooks
1985 //===----------------------------------------------------------------------===//
1986 static SDValue
1987 performVECTOR_SHUFFLECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
1988   auto &DAG = DCI.DAG;
1989   auto Shuffle = cast<ShuffleVectorSDNode>(N);
1990 
1991   // Hoist vector bitcasts that don't change the number of lanes out of unary
1992   // shuffles, where they are less likely to get in the way of other combines.
1993   // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
1994   //  (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
1995   SDValue Bitcast = N->getOperand(0);
1996   if (Bitcast.getOpcode() != ISD::BITCAST)
1997     return SDValue();
1998   if (!N->getOperand(1).isUndef())
1999     return SDValue();
2000   SDValue CastOp = Bitcast.getOperand(0);
2001   MVT SrcType = CastOp.getSimpleValueType();
2002   MVT DstType = Bitcast.getSimpleValueType();
2003   if (!SrcType.is128BitVector() ||
2004       SrcType.getVectorNumElements() != DstType.getVectorNumElements())
2005     return SDValue();
2006   SDValue NewShuffle = DAG.getVectorShuffle(
2007       SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());
2008   return DAG.getBitcast(DstType, NewShuffle);
2009 }
2010 
2011 static SDValue
2012 performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
2013   auto &DAG = DCI.DAG;
2014   assert(N->getOpcode() == ISD::SIGN_EXTEND ||
2015          N->getOpcode() == ISD::ZERO_EXTEND);
2016 
2017   // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
2018   // possible before the extract_subvector can be expanded.
2019   auto Extract = N->getOperand(0);
2020   if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2021     return SDValue();
2022   auto Source = Extract.getOperand(0);
2023   auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
2024   if (IndexNode == nullptr)
2025     return SDValue();
2026   auto Index = IndexNode->getZExtValue();
2027 
2028   // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
2029   // extracted subvector is the low or high half of its source.
2030   EVT ResVT = N->getValueType(0);
2031   if (ResVT == MVT::v8i16) {
2032     if (Extract.getValueType() != MVT::v8i8 ||
2033         Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
2034       return SDValue();
2035   } else if (ResVT == MVT::v4i32) {
2036     if (Extract.getValueType() != MVT::v4i16 ||
2037         Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
2038       return SDValue();
2039   } else if (ResVT == MVT::v2i64) {
2040     if (Extract.getValueType() != MVT::v2i32 ||
2041         Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2))
2042       return SDValue();
2043   } else {
2044     return SDValue();
2045   }
2046 
2047   bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
2048   bool IsLow = Index == 0;
2049 
2050   unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S
2051                                 : WebAssemblyISD::EXTEND_HIGH_S)
2052                        : (IsLow ? WebAssemblyISD::EXTEND_LOW_U
2053                                 : WebAssemblyISD::EXTEND_HIGH_U);
2054 
2055   return DAG.getNode(Op, SDLoc(N), ResVT, Source);
2056 }
2057 
2058 static SDValue
2059 performVectorConvertLowCombine(SDNode *N,
2060                                TargetLowering::DAGCombinerInfo &DCI) {
2061   auto &DAG = DCI.DAG;
2062 
2063   EVT ResVT = N->getValueType(0);
2064   if (ResVT != MVT::v2f64)
2065     return SDValue();
2066 
2067   if (N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) {
2068     // Combine this:
2069     //
2070     //   (v2f64 ({s,u}int_to_fp
2071     //     (v2i32 (extract_subvector (v4i32 $x), 0))))
2072     //
2073     // into (f64x2.convert_low_i32x4_{s,u} $x).
2074     auto Extract = N->getOperand(0);
2075     if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2076       return SDValue();
2077     if (Extract.getValueType() != MVT::v2i32)
2078       return SDValue();
2079     auto Source = Extract.getOperand(0);
2080     if (Source.getValueType() != MVT::v4i32)
2081       return SDValue();
2082     auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
2083     if (IndexNode == nullptr || IndexNode->getZExtValue() != 0)
2084       return SDValue();
2085 
2086     unsigned Op = N->getOpcode() == ISD::SINT_TO_FP
2087                       ? WebAssemblyISD::CONVERT_LOW_S
2088                       : WebAssemblyISD::CONVERT_LOW_U;
2089 
2090     return DAG.getNode(Op, SDLoc(N), ResVT, Source);
2091 
2092   } else if (N->getOpcode() == ISD::EXTRACT_SUBVECTOR) {
2093     // Combine this:
2094     //
2095     //   (v2f64 (extract_subvector
2096     //     (v4f64 ({s,u}int_to_fp (v4i32 $x))), 0))
2097     //
2098     // into (f64x2.convert_low_i32x4_{s,u} $x).
2099     auto IntToFP = N->getOperand(0);
2100     if (IntToFP.getOpcode() != ISD::SINT_TO_FP &&
2101         IntToFP.getOpcode() != ISD::UINT_TO_FP)
2102       return SDValue();
2103     if (IntToFP.getValueType() != MVT::v4f64)
2104       return SDValue();
2105     auto Source = IntToFP.getOperand(0);
2106     if (Source.getValueType() != MVT::v4i32)
2107       return SDValue();
2108     auto IndexNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
2109     if (IndexNode == nullptr || IndexNode->getZExtValue() != 0)
2110       return SDValue();
2111 
2112     unsigned Op = IntToFP->getOpcode() == ISD::SINT_TO_FP
2113                       ? WebAssemblyISD::CONVERT_LOW_S
2114                       : WebAssemblyISD::CONVERT_LOW_U;
2115 
2116     return DAG.getNode(Op, SDLoc(N), ResVT, Source);
2117 
2118   } else {
2119     llvm_unreachable("unexpected opcode");
2120   }
2121 }
2122 
2123 static SDValue
2124 performVectorTruncSatLowCombine(SDNode *N,
2125                                 TargetLowering::DAGCombinerInfo &DCI) {
2126   auto &DAG = DCI.DAG;
2127   assert(N->getOpcode() == ISD::CONCAT_VECTORS);
2128 
2129   // Combine this:
2130   //
2131   //   (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
2132   //
2133   // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
2134   EVT ResVT = N->getValueType(0);
2135   if (ResVT != MVT::v4i32)
2136     return SDValue();
2137 
2138   auto FPToInt = N->getOperand(0);
2139   auto FPToIntOp = FPToInt.getOpcode();
2140   if (FPToIntOp != ISD::FP_TO_SINT_SAT && FPToIntOp != ISD::FP_TO_UINT_SAT)
2141     return SDValue();
2142   if (cast<VTSDNode>(FPToInt.getOperand(1))->getVT() != MVT::i32)
2143     return SDValue();
2144 
2145   auto Source = FPToInt.getOperand(0);
2146   if (Source.getValueType() != MVT::v2f64)
2147     return SDValue();
2148 
2149   auto *Splat = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
2150   APInt SplatValue, SplatUndef;
2151   unsigned SplatBitSize;
2152   bool HasAnyUndefs;
2153   if (!Splat || !Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
2154                                         HasAnyUndefs))
2155     return SDValue();
2156   if (SplatValue != 0)
2157     return SDValue();
2158 
2159   unsigned Op = FPToIntOp == ISD::FP_TO_SINT_SAT
2160                     ? WebAssemblyISD::TRUNC_SAT_ZERO_S
2161                     : WebAssemblyISD::TRUNC_SAT_ZERO_U;
2162 
2163   return DAG.getNode(Op, SDLoc(N), ResVT, Source);
2164 }
2165 
2166 SDValue
2167 WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
2168                                              DAGCombinerInfo &DCI) const {
2169   switch (N->getOpcode()) {
2170   default:
2171     return SDValue();
2172   case ISD::VECTOR_SHUFFLE:
2173     return performVECTOR_SHUFFLECombine(N, DCI);
2174   case ISD::SIGN_EXTEND:
2175   case ISD::ZERO_EXTEND:
2176     return performVectorExtendCombine(N, DCI);
2177   case ISD::SINT_TO_FP:
2178   case ISD::UINT_TO_FP:
2179   case ISD::EXTRACT_SUBVECTOR:
2180     return performVectorConvertLowCombine(N, DCI);
2181   case ISD::CONCAT_VECTORS:
2182     return performVectorTruncSatLowCombine(N, DCI);
2183   }
2184 }
2185