1 //=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the WebAssemblyTargetLowering class.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "WebAssemblyISelLowering.h"
15 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
16 #include "WebAssemblyMachineFunctionInfo.h"
17 #include "WebAssemblySubtarget.h"
18 #include "WebAssemblyTargetMachine.h"
19 #include "WebAssemblyUtilities.h"
20 #include "llvm/CodeGen/Analysis.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineJumpTableInfo.h"
24 #include "llvm/CodeGen/MachineModuleInfo.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/SelectionDAG.h"
27 #include "llvm/CodeGen/WasmEHFuncInfo.h"
28 #include "llvm/IR/DiagnosticInfo.h"
29 #include "llvm/IR/DiagnosticPrinter.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/IntrinsicsWebAssembly.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/MathExtras.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include "llvm/Target/TargetOptions.h"
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "wasm-lower"
41 
42 WebAssemblyTargetLowering::WebAssemblyTargetLowering(
43     const TargetMachine &TM, const WebAssemblySubtarget &STI)
44     : TargetLowering(TM), Subtarget(&STI) {
45   auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
46 
47   // Booleans always contain 0 or 1.
48   setBooleanContents(ZeroOrOneBooleanContent);
49   // Except in SIMD vectors
50   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
51   // We don't know the microarchitecture here, so just reduce register pressure.
52   setSchedulingPreference(Sched::RegPressure);
53   // Tell ISel that we have a stack pointer.
54   setStackPointerRegisterToSaveRestore(
55       Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
56   // Set up the register classes.
57   addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
58   addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
59   addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
60   addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
61   if (Subtarget->hasSIMD128()) {
62     addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
63     addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
64     addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
65     addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
66     addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
67     addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
68   }
69   // Compute derived properties from the register classes.
70   computeRegisterProperties(Subtarget->getRegisterInfo());
71 
72   setOperationAction(ISD::GlobalAddress, MVTPtr, Custom);
73   setOperationAction(ISD::GlobalTLSAddress, MVTPtr, Custom);
74   setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom);
75   setOperationAction(ISD::JumpTable, MVTPtr, Custom);
76   setOperationAction(ISD::BlockAddress, MVTPtr, Custom);
77   setOperationAction(ISD::BRIND, MVT::Other, Custom);
78 
79   // Take the default expansion for va_arg, va_copy, and va_end. There is no
80   // default action for va_start, so we do that custom.
81   setOperationAction(ISD::VASTART, MVT::Other, Custom);
82   setOperationAction(ISD::VAARG, MVT::Other, Expand);
83   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
84   setOperationAction(ISD::VAEND, MVT::Other, Expand);
85 
86   for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
87     // Don't expand the floating-point types to constant pools.
88     setOperationAction(ISD::ConstantFP, T, Legal);
89     // Expand floating-point comparisons.
90     for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
91                     ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
92       setCondCodeAction(CC, T, Expand);
93     // Expand floating-point library function operators.
94     for (auto Op :
95          {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA})
96       setOperationAction(Op, T, Expand);
97     // Note supported floating-point library function operators that otherwise
98     // default to expand.
99     for (auto Op :
100          {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT})
101       setOperationAction(Op, T, Legal);
102     // Support minimum and maximum, which otherwise default to expand.
103     setOperationAction(ISD::FMINIMUM, T, Legal);
104     setOperationAction(ISD::FMAXIMUM, T, Legal);
105     // WebAssembly currently has no builtin f16 support.
106     setOperationAction(ISD::FP16_TO_FP, T, Expand);
107     setOperationAction(ISD::FP_TO_FP16, T, Expand);
108     setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand);
109     setTruncStoreAction(T, MVT::f16, Expand);
110   }
111 
112   // Expand unavailable integer operations.
113   for (auto Op :
114        {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,
115         ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS,
116         ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) {
117     for (auto T : {MVT::i32, MVT::i64})
118       setOperationAction(Op, T, Expand);
119     if (Subtarget->hasSIMD128())
120       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
121         setOperationAction(Op, T, Expand);
122   }
123 
124   // SIMD-specific configuration
125   if (Subtarget->hasSIMD128()) {
126     // Hoist bitcasts out of shuffles
127     setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
128 
129     // Combine extends of extract_subvectors into widening ops
130     setTargetDAGCombine(ISD::SIGN_EXTEND);
131     setTargetDAGCombine(ISD::ZERO_EXTEND);
132 
133     // Support saturating add for i8x16 and i16x8
134     for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
135       for (auto T : {MVT::v16i8, MVT::v8i16})
136         setOperationAction(Op, T, Legal);
137 
138     // Support integer abs
139     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
140       setOperationAction(ISD::ABS, T, Legal);
141 
142     // Custom lower BUILD_VECTORs to minimize number of replace_lanes
143     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
144                    MVT::v2f64})
145       setOperationAction(ISD::BUILD_VECTOR, T, Custom);
146 
147     // We have custom shuffle lowering to expose the shuffle mask
148     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
149                    MVT::v2f64})
150       setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
151 
152     // Custom lowering since wasm shifts must have a scalar shift amount
153     for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
154       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
155         setOperationAction(Op, T, Custom);
156 
157     // Custom lower lane accesses to expand out variable indices
158     for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT})
159       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
160                      MVT::v2f64})
161         setOperationAction(Op, T, Custom);
162 
163     // There is no i8x16.mul instruction
164     setOperationAction(ISD::MUL, MVT::v16i8, Expand);
165 
166     // There is no vector conditional select instruction
167     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
168                    MVT::v2f64})
169       setOperationAction(ISD::SELECT_CC, T, Expand);
170 
171     // Expand integer operations supported for scalars but not SIMD
172     for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV,
173                     ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})
174       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
175         setOperationAction(Op, T, Expand);
176 
177     // But we do have integer min and max operations
178     for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
179       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
180         setOperationAction(Op, T, Legal);
181 
182     // Expand float operations supported for scalars but not SIMD
183     for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
184                     ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
185                     ISD::FEXP, ISD::FEXP2, ISD::FRINT})
186       for (auto T : {MVT::v4f32, MVT::v2f64})
187         setOperationAction(Op, T, Expand);
188 
189     // Expand operations not supported for i64x2 vectors
190     for (unsigned CC = 0; CC < ISD::SETCC_INVALID; ++CC)
191       setCondCodeAction(static_cast<ISD::CondCode>(CC), MVT::v2i64, Custom);
192 
193     // 64x2 conversions are not in the spec
194     for (auto Op :
195          {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT})
196       for (auto T : {MVT::v2i64, MVT::v2f64})
197         setOperationAction(Op, T, Expand);
198   }
199 
200   // As a special case, these operators use the type to mean the type to
201   // sign-extend from.
202   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
203   if (!Subtarget->hasSignExt()) {
204     // Sign extends are legal only when extending a vector extract
205     auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
206     for (auto T : {MVT::i8, MVT::i16, MVT::i32})
207       setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action);
208   }
209   for (auto T : MVT::integer_fixedlen_vector_valuetypes())
210     setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
211 
212   // Dynamic stack allocation: use the default expansion.
213   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
214   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
215   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);
216 
217   setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
218   setOperationAction(ISD::FrameIndex, MVT::i64, Custom);
219   setOperationAction(ISD::CopyToReg, MVT::Other, Custom);
220 
221   // Expand these forms; we pattern-match the forms that we can handle in isel.
222   for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
223     for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
224       setOperationAction(Op, T, Expand);
225 
226   // We have custom switch handling.
227   setOperationAction(ISD::BR_JT, MVT::Other, Custom);
228 
229   // WebAssembly doesn't have:
230   //  - Floating-point extending loads.
231   //  - Floating-point truncating stores.
232   //  - i1 extending loads.
233   //  - truncating SIMD stores and most extending loads
234   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
235   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
236   for (auto T : MVT::integer_valuetypes())
237     for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
238       setLoadExtAction(Ext, T, MVT::i1, Promote);
239   if (Subtarget->hasSIMD128()) {
240     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
241                    MVT::v2f64}) {
242       for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
243         if (MVT(T) != MemT) {
244           setTruncStoreAction(T, MemT, Expand);
245           for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
246             setLoadExtAction(Ext, T, MemT, Expand);
247         }
248       }
249     }
250     // But some vector extending loads are legal
251     for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
252       setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
253       setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
254       setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
255     }
256     // And some truncating stores are legal as well
257     setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
258     setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
259   }
260 
261   // Don't do anything clever with build_pairs
262   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
263 
264   // Trap lowers to wasm unreachable
265   setOperationAction(ISD::TRAP, MVT::Other, Legal);
266   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
267 
268   // Exception handling intrinsics
269   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
270   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
271   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
272 
273   setMaxAtomicSizeInBitsSupported(64);
274 
275   // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is
276   // consistent with the f64 and f128 names.
277   setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
278   setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
279 
280   // Define the emscripten name for return address helper.
281   // TODO: when implementing other Wasm backends, make this generic or only do
282   // this on emscripten depending on what they end up doing.
283   setLibcallName(RTLIB::RETURN_ADDRESS, "emscripten_return_address");
284 
285   // Always convert switches to br_tables unless there is only one case, which
286   // is equivalent to a simple branch. This reduces code size for wasm, and we
287   // defer possible jump table optimizations to the VM.
288   setMinimumJumpTableEntries(2);
289 }
290 
291 TargetLowering::AtomicExpansionKind
292 WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
293   // We have wasm instructions for these
294   switch (AI->getOperation()) {
295   case AtomicRMWInst::Add:
296   case AtomicRMWInst::Sub:
297   case AtomicRMWInst::And:
298   case AtomicRMWInst::Or:
299   case AtomicRMWInst::Xor:
300   case AtomicRMWInst::Xchg:
301     return AtomicExpansionKind::None;
302   default:
303     break;
304   }
305   return AtomicExpansionKind::CmpXChg;
306 }
307 
308 FastISel *WebAssemblyTargetLowering::createFastISel(
309     FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
310   return WebAssembly::createFastISel(FuncInfo, LibInfo);
311 }
312 
313 MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
314                                                       EVT VT) const {
315   unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
316   if (BitWidth > 1 && BitWidth < 8)
317     BitWidth = 8;
318 
319   if (BitWidth > 64) {
320     // The shift will be lowered to a libcall, and compiler-rt libcalls expect
321     // the count to be an i32.
322     BitWidth = 32;
323     assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) &&
324            "32-bit shift counts ought to be enough for anyone");
325   }
326 
327   MVT Result = MVT::getIntegerVT(BitWidth);
328   assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE &&
329          "Unable to represent scalar shift amount type");
330   return Result;
331 }
332 
333 // Lower an fp-to-int conversion operator from the LLVM opcode, which has an
334 // undefined result on invalid/overflow, to the WebAssembly opcode, which
335 // traps on invalid/overflow.
336 static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
337                                        MachineBasicBlock *BB,
338                                        const TargetInstrInfo &TII,
339                                        bool IsUnsigned, bool Int64,
340                                        bool Float64, unsigned LoweredOpcode) {
341   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
342 
343   Register OutReg = MI.getOperand(0).getReg();
344   Register InReg = MI.getOperand(1).getReg();
345 
346   unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
347   unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
348   unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
349   unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
350   unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
351   unsigned Eqz = WebAssembly::EQZ_I32;
352   unsigned And = WebAssembly::AND_I32;
353   int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
354   int64_t Substitute = IsUnsigned ? 0 : Limit;
355   double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
356   auto &Context = BB->getParent()->getFunction().getContext();
357   Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
358 
359   const BasicBlock *LLVMBB = BB->getBasicBlock();
360   MachineFunction *F = BB->getParent();
361   MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
362   MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
363   MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
364 
365   MachineFunction::iterator It = ++BB->getIterator();
366   F->insert(It, FalseMBB);
367   F->insert(It, TrueMBB);
368   F->insert(It, DoneMBB);
369 
370   // Transfer the remainder of BB and its successor edges to DoneMBB.
371   DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
372   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
373 
374   BB->addSuccessor(TrueMBB);
375   BB->addSuccessor(FalseMBB);
376   TrueMBB->addSuccessor(DoneMBB);
377   FalseMBB->addSuccessor(DoneMBB);
378 
379   unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
380   Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
381   Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
382   CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
383   EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
384   FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
385   TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
386 
387   MI.eraseFromParent();
388   // For signed numbers, we can do a single comparison to determine whether
389   // fabs(x) is within range.
390   if (IsUnsigned) {
391     Tmp0 = InReg;
392   } else {
393     BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
394   }
395   BuildMI(BB, DL, TII.get(FConst), Tmp1)
396       .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
397   BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
398 
399   // For unsigned numbers, we have to do a separate comparison with zero.
400   if (IsUnsigned) {
401     Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
402     Register SecondCmpReg =
403         MRI.createVirtualRegister(&WebAssembly::I32RegClass);
404     Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
405     BuildMI(BB, DL, TII.get(FConst), Tmp1)
406         .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
407     BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
408     BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
409     CmpReg = AndReg;
410   }
411 
412   BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
413 
414   // Create the CFG diamond to select between doing the conversion or using
415   // the substitute value.
416   BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
417   BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
418   BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
419   BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
420   BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
421       .addReg(FalseReg)
422       .addMBB(FalseMBB)
423       .addReg(TrueReg)
424       .addMBB(TrueMBB);
425 
426   return DoneMBB;
427 }
428 
429 static MachineBasicBlock *LowerCallResults(MachineInstr &CallResults,
430                                            DebugLoc DL, MachineBasicBlock *BB,
431                                            const TargetInstrInfo &TII) {
432   MachineInstr &CallParams = *CallResults.getPrevNode();
433   assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
434   assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
435          CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
436 
437   bool IsIndirect = CallParams.getOperand(0).isReg();
438   bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
439 
440   unsigned CallOp;
441   if (IsIndirect && IsRetCall) {
442     CallOp = WebAssembly::RET_CALL_INDIRECT;
443   } else if (IsIndirect) {
444     CallOp = WebAssembly::CALL_INDIRECT;
445   } else if (IsRetCall) {
446     CallOp = WebAssembly::RET_CALL;
447   } else {
448     CallOp = WebAssembly::CALL;
449   }
450 
451   MachineFunction &MF = *BB->getParent();
452   const MCInstrDesc &MCID = TII.get(CallOp);
453   MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
454 
455   // See if we must truncate the function pointer.
456   // CALL_INDIRECT takes an i32, but in wasm64 we represent function pointers
457   // as 64-bit for uniformity with other pointer types.
458   if (IsIndirect && MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()) {
459     Register Reg32 =
460         MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
461     auto &FnPtr = CallParams.getOperand(0);
462     BuildMI(*BB, CallResults.getIterator(), DL,
463             TII.get(WebAssembly::I32_WRAP_I64), Reg32)
464         .addReg(FnPtr.getReg());
465     FnPtr.setReg(Reg32);
466   }
467 
468   // Move the function pointer to the end of the arguments for indirect calls
469   if (IsIndirect) {
470     auto FnPtr = CallParams.getOperand(0);
471     CallParams.RemoveOperand(0);
472     CallParams.addOperand(FnPtr);
473   }
474 
475   for (auto Def : CallResults.defs())
476     MIB.add(Def);
477 
478   if (IsIndirect) {
479     // Placehoder for the type index.
480     MIB.addImm(0);
481     // The table into which this call_indirect indexes.
482     MIB.addSym(WebAssembly::getOrCreateFunctionTableSymbol(
483         MF.getContext(), "__indirect_function_table"));
484   }
485 
486   for (auto Use : CallParams.uses())
487     MIB.add(Use);
488 
489   BB->insert(CallResults.getIterator(), MIB);
490   CallParams.eraseFromParent();
491   CallResults.eraseFromParent();
492 
493   return BB;
494 }
495 
496 MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
497     MachineInstr &MI, MachineBasicBlock *BB) const {
498   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
499   DebugLoc DL = MI.getDebugLoc();
500 
501   switch (MI.getOpcode()) {
502   default:
503     llvm_unreachable("Unexpected instr type to insert");
504   case WebAssembly::FP_TO_SINT_I32_F32:
505     return LowerFPToInt(MI, DL, BB, TII, false, false, false,
506                         WebAssembly::I32_TRUNC_S_F32);
507   case WebAssembly::FP_TO_UINT_I32_F32:
508     return LowerFPToInt(MI, DL, BB, TII, true, false, false,
509                         WebAssembly::I32_TRUNC_U_F32);
510   case WebAssembly::FP_TO_SINT_I64_F32:
511     return LowerFPToInt(MI, DL, BB, TII, false, true, false,
512                         WebAssembly::I64_TRUNC_S_F32);
513   case WebAssembly::FP_TO_UINT_I64_F32:
514     return LowerFPToInt(MI, DL, BB, TII, true, true, false,
515                         WebAssembly::I64_TRUNC_U_F32);
516   case WebAssembly::FP_TO_SINT_I32_F64:
517     return LowerFPToInt(MI, DL, BB, TII, false, false, true,
518                         WebAssembly::I32_TRUNC_S_F64);
519   case WebAssembly::FP_TO_UINT_I32_F64:
520     return LowerFPToInt(MI, DL, BB, TII, true, false, true,
521                         WebAssembly::I32_TRUNC_U_F64);
522   case WebAssembly::FP_TO_SINT_I64_F64:
523     return LowerFPToInt(MI, DL, BB, TII, false, true, true,
524                         WebAssembly::I64_TRUNC_S_F64);
525   case WebAssembly::FP_TO_UINT_I64_F64:
526     return LowerFPToInt(MI, DL, BB, TII, true, true, true,
527                         WebAssembly::I64_TRUNC_U_F64);
528   case WebAssembly::CALL_RESULTS:
529   case WebAssembly::RET_CALL_RESULTS:
530     return LowerCallResults(MI, DL, BB, TII);
531   }
532 }
533 
534 const char *
535 WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {
536   switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
537   case WebAssemblyISD::FIRST_NUMBER:
538   case WebAssemblyISD::FIRST_MEM_OPCODE:
539     break;
540 #define HANDLE_NODETYPE(NODE)                                                  \
541   case WebAssemblyISD::NODE:                                                   \
542     return "WebAssemblyISD::" #NODE;
543 #define HANDLE_MEM_NODETYPE(NODE) HANDLE_NODETYPE(NODE)
544 #include "WebAssemblyISD.def"
545 #undef HANDLE_MEM_NODETYPE
546 #undef HANDLE_NODETYPE
547   }
548   return nullptr;
549 }
550 
551 std::pair<unsigned, const TargetRegisterClass *>
552 WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
553     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
554   // First, see if this is a constraint that directly corresponds to a
555   // WebAssembly register class.
556   if (Constraint.size() == 1) {
557     switch (Constraint[0]) {
558     case 'r':
559       assert(VT != MVT::iPTR && "Pointer MVT not expected here");
560       if (Subtarget->hasSIMD128() && VT.isVector()) {
561         if (VT.getSizeInBits() == 128)
562           return std::make_pair(0U, &WebAssembly::V128RegClass);
563       }
564       if (VT.isInteger() && !VT.isVector()) {
565         if (VT.getSizeInBits() <= 32)
566           return std::make_pair(0U, &WebAssembly::I32RegClass);
567         if (VT.getSizeInBits() <= 64)
568           return std::make_pair(0U, &WebAssembly::I64RegClass);
569       }
570       if (VT.isFloatingPoint() && !VT.isVector()) {
571         switch (VT.getSizeInBits()) {
572         case 32:
573           return std::make_pair(0U, &WebAssembly::F32RegClass);
574         case 64:
575           return std::make_pair(0U, &WebAssembly::F64RegClass);
576         default:
577           break;
578         }
579       }
580       break;
581     default:
582       break;
583     }
584   }
585 
586   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
587 }
588 
589 bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const {
590   // Assume ctz is a relatively cheap operation.
591   return true;
592 }
593 
594 bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const {
595   // Assume clz is a relatively cheap operation.
596   return true;
597 }
598 
599 bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
600                                                       const AddrMode &AM,
601                                                       Type *Ty, unsigned AS,
602                                                       Instruction *I) const {
603   // WebAssembly offsets are added as unsigned without wrapping. The
604   // isLegalAddressingMode gives us no way to determine if wrapping could be
605   // happening, so we approximate this by accepting only non-negative offsets.
606   if (AM.BaseOffs < 0)
607     return false;
608 
609   // WebAssembly has no scale register operands.
610   if (AM.Scale != 0)
611     return false;
612 
613   // Everything else is legal.
614   return true;
615 }
616 
617 bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
618     EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/,
619     MachineMemOperand::Flags /*Flags*/, bool *Fast) const {
620   // WebAssembly supports unaligned accesses, though it should be declared
621   // with the p2align attribute on loads and stores which do so, and there
622   // may be a performance impact. We tell LLVM they're "fast" because
623   // for the kinds of things that LLVM uses this for (merging adjacent stores
624   // of constants, etc.), WebAssembly implementations will either want the
625   // unaligned access or they'll split anyway.
626   if (Fast)
627     *Fast = true;
628   return true;
629 }
630 
631 bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
632                                               AttributeList Attr) const {
633   // The current thinking is that wasm engines will perform this optimization,
634   // so we can save on code size.
635   return true;
636 }
637 
638 bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
639   EVT ExtT = ExtVal.getValueType();
640   EVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getValueType(0);
641   return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
642          (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
643          (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
644 }
645 
646 EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
647                                                   LLVMContext &C,
648                                                   EVT VT) const {
649   if (VT.isVector())
650     return VT.changeVectorElementTypeToInteger();
651 
652   // So far, all branch instructions in Wasm take an I32 condition.
653   // The default TargetLowering::getSetCCResultType returns the pointer size,
654   // which would be useful to reduce instruction counts when testing
655   // against 64-bit pointers/values if at some point Wasm supports that.
656   return EVT::getIntegerVT(C, 32);
657 }
658 
659 bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
660                                                    const CallInst &I,
661                                                    MachineFunction &MF,
662                                                    unsigned Intrinsic) const {
663   switch (Intrinsic) {
664   case Intrinsic::wasm_memory_atomic_notify:
665     Info.opc = ISD::INTRINSIC_W_CHAIN;
666     Info.memVT = MVT::i32;
667     Info.ptrVal = I.getArgOperand(0);
668     Info.offset = 0;
669     Info.align = Align(4);
670     // atomic.notify instruction does not really load the memory specified with
671     // this argument, but MachineMemOperand should either be load or store, so
672     // we set this to a load.
673     // FIXME Volatile isn't really correct, but currently all LLVM atomic
674     // instructions are treated as volatiles in the backend, so we should be
675     // consistent. The same applies for wasm_atomic_wait intrinsics too.
676     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
677     return true;
678   case Intrinsic::wasm_memory_atomic_wait32:
679     Info.opc = ISD::INTRINSIC_W_CHAIN;
680     Info.memVT = MVT::i32;
681     Info.ptrVal = I.getArgOperand(0);
682     Info.offset = 0;
683     Info.align = Align(4);
684     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
685     return true;
686   case Intrinsic::wasm_memory_atomic_wait64:
687     Info.opc = ISD::INTRINSIC_W_CHAIN;
688     Info.memVT = MVT::i64;
689     Info.ptrVal = I.getArgOperand(0);
690     Info.offset = 0;
691     Info.align = Align(8);
692     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
693     return true;
694   case Intrinsic::wasm_load32_zero:
695   case Intrinsic::wasm_load64_zero:
696     Info.opc = ISD::INTRINSIC_W_CHAIN;
697     Info.memVT = Intrinsic == Intrinsic::wasm_load32_zero ? MVT::i32 : MVT::i64;
698     Info.ptrVal = I.getArgOperand(0);
699     Info.offset = 0;
700     Info.align = Info.memVT == MVT::i32 ? Align(4) : Align(8);
701     Info.flags = MachineMemOperand::MOLoad;
702     return true;
703   case Intrinsic::wasm_load8_lane:
704   case Intrinsic::wasm_load16_lane:
705   case Intrinsic::wasm_load32_lane:
706   case Intrinsic::wasm_load64_lane:
707   case Intrinsic::wasm_store8_lane:
708   case Intrinsic::wasm_store16_lane:
709   case Intrinsic::wasm_store32_lane:
710   case Intrinsic::wasm_store64_lane: {
711     MVT MemVT;
712     Align MemAlign;
713     switch (Intrinsic) {
714     case Intrinsic::wasm_load8_lane:
715     case Intrinsic::wasm_store8_lane:
716       MemVT = MVT::i8;
717       MemAlign = Align(1);
718       break;
719     case Intrinsic::wasm_load16_lane:
720     case Intrinsic::wasm_store16_lane:
721       MemVT = MVT::i16;
722       MemAlign = Align(2);
723       break;
724     case Intrinsic::wasm_load32_lane:
725     case Intrinsic::wasm_store32_lane:
726       MemVT = MVT::i32;
727       MemAlign = Align(4);
728       break;
729     case Intrinsic::wasm_load64_lane:
730     case Intrinsic::wasm_store64_lane:
731       MemVT = MVT::i64;
732       MemAlign = Align(8);
733       break;
734     default:
735       llvm_unreachable("unexpected intrinsic");
736     }
737     if (Intrinsic == Intrinsic::wasm_load8_lane ||
738         Intrinsic == Intrinsic::wasm_load16_lane ||
739         Intrinsic == Intrinsic::wasm_load32_lane ||
740         Intrinsic == Intrinsic::wasm_load64_lane) {
741       Info.opc = ISD::INTRINSIC_W_CHAIN;
742       Info.flags = MachineMemOperand::MOLoad;
743     } else {
744       Info.opc = ISD::INTRINSIC_VOID;
745       Info.flags = MachineMemOperand::MOStore;
746     }
747     Info.ptrVal = I.getArgOperand(0);
748     Info.memVT = MemVT;
749     Info.offset = 0;
750     Info.align = MemAlign;
751     return true;
752   }
753   case Intrinsic::wasm_prefetch_t:
754   case Intrinsic::wasm_prefetch_nt: {
755     Info.opc = ISD::INTRINSIC_VOID;
756     Info.memVT = MVT::i8;
757     Info.ptrVal = I.getArgOperand(0);
758     Info.offset = 0;
759     Info.align = Align(1);
760     Info.flags = MachineMemOperand::MOLoad;
761     return true;
762   }
763   default:
764     return false;
765   }
766 }
767 
768 //===----------------------------------------------------------------------===//
769 // WebAssembly Lowering private implementation.
770 //===----------------------------------------------------------------------===//
771 
772 //===----------------------------------------------------------------------===//
773 // Lowering Code
774 //===----------------------------------------------------------------------===//
775 
776 static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
777   MachineFunction &MF = DAG.getMachineFunction();
778   DAG.getContext()->diagnose(
779       DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
780 }
781 
782 // Test whether the given calling convention is supported.
783 static bool callingConvSupported(CallingConv::ID CallConv) {
784   // We currently support the language-independent target-independent
785   // conventions. We don't yet have a way to annotate calls with properties like
786   // "cold", and we don't have any call-clobbered registers, so these are mostly
787   // all handled the same.
788   return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
789          CallConv == CallingConv::Cold ||
790          CallConv == CallingConv::PreserveMost ||
791          CallConv == CallingConv::PreserveAll ||
792          CallConv == CallingConv::CXX_FAST_TLS ||
793          CallConv == CallingConv::WASM_EmscriptenInvoke ||
794          CallConv == CallingConv::Swift;
795 }
796 
797 SDValue
798 WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
799                                      SmallVectorImpl<SDValue> &InVals) const {
800   SelectionDAG &DAG = CLI.DAG;
801   SDLoc DL = CLI.DL;
802   SDValue Chain = CLI.Chain;
803   SDValue Callee = CLI.Callee;
804   MachineFunction &MF = DAG.getMachineFunction();
805   auto Layout = MF.getDataLayout();
806 
807   CallingConv::ID CallConv = CLI.CallConv;
808   if (!callingConvSupported(CallConv))
809     fail(DL, DAG,
810          "WebAssembly doesn't support language-specific or target-specific "
811          "calling conventions yet");
812   if (CLI.IsPatchPoint)
813     fail(DL, DAG, "WebAssembly doesn't support patch point yet");
814 
815   if (CLI.IsTailCall) {
816     auto NoTail = [&](const char *Msg) {
817       if (CLI.CB && CLI.CB->isMustTailCall())
818         fail(DL, DAG, Msg);
819       CLI.IsTailCall = false;
820     };
821 
822     if (!Subtarget->hasTailCall())
823       NoTail("WebAssembly 'tail-call' feature not enabled");
824 
825     // Varargs calls cannot be tail calls because the buffer is on the stack
826     if (CLI.IsVarArg)
827       NoTail("WebAssembly does not support varargs tail calls");
828 
829     // Do not tail call unless caller and callee return types match
830     const Function &F = MF.getFunction();
831     const TargetMachine &TM = getTargetMachine();
832     Type *RetTy = F.getReturnType();
833     SmallVector<MVT, 4> CallerRetTys;
834     SmallVector<MVT, 4> CalleeRetTys;
835     computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
836     computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
837     bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
838                       std::equal(CallerRetTys.begin(), CallerRetTys.end(),
839                                  CalleeRetTys.begin());
840     if (!TypesMatch)
841       NoTail("WebAssembly tail call requires caller and callee return types to "
842              "match");
843 
844     // If pointers to local stack values are passed, we cannot tail call
845     if (CLI.CB) {
846       for (auto &Arg : CLI.CB->args()) {
847         Value *Val = Arg.get();
848         // Trace the value back through pointer operations
849         while (true) {
850           Value *Src = Val->stripPointerCastsAndAliases();
851           if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
852             Src = GEP->getPointerOperand();
853           if (Val == Src)
854             break;
855           Val = Src;
856         }
857         if (isa<AllocaInst>(Val)) {
858           NoTail(
859               "WebAssembly does not support tail calling with stack arguments");
860           break;
861         }
862       }
863     }
864   }
865 
866   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
867   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
868   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
869 
870   // The generic code may have added an sret argument. If we're lowering an
871   // invoke function, the ABI requires that the function pointer be the first
872   // argument, so we may have to swap the arguments.
873   if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
874       Outs[0].Flags.isSRet()) {
875     std::swap(Outs[0], Outs[1]);
876     std::swap(OutVals[0], OutVals[1]);
877   }
878 
879   bool HasSwiftSelfArg = false;
880   bool HasSwiftErrorArg = false;
881   unsigned NumFixedArgs = 0;
882   for (unsigned I = 0; I < Outs.size(); ++I) {
883     const ISD::OutputArg &Out = Outs[I];
884     SDValue &OutVal = OutVals[I];
885     HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
886     HasSwiftErrorArg |= Out.Flags.isSwiftError();
887     if (Out.Flags.isNest())
888       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
889     if (Out.Flags.isInAlloca())
890       fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
891     if (Out.Flags.isInConsecutiveRegs())
892       fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
893     if (Out.Flags.isInConsecutiveRegsLast())
894       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
895     if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
896       auto &MFI = MF.getFrameInfo();
897       int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
898                                      Out.Flags.getNonZeroByValAlign(),
899                                      /*isSS=*/false);
900       SDValue SizeNode =
901           DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
902       SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
903       Chain = DAG.getMemcpy(
904           Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getNonZeroByValAlign(),
905           /*isVolatile*/ false, /*AlwaysInline=*/false,
906           /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
907       OutVal = FINode;
908     }
909     // Count the number of fixed args *after* legalization.
910     NumFixedArgs += Out.IsFixed;
911   }
912 
913   bool IsVarArg = CLI.IsVarArg;
914   auto PtrVT = getPointerTy(Layout);
915 
916   // For swiftcc, emit additional swiftself and swifterror arguments
917   // if there aren't. These additional arguments are also added for callee
918   // signature They are necessary to match callee and caller signature for
919   // indirect call.
920   if (CallConv == CallingConv::Swift) {
921     if (!HasSwiftSelfArg) {
922       NumFixedArgs++;
923       ISD::OutputArg Arg;
924       Arg.Flags.setSwiftSelf();
925       CLI.Outs.push_back(Arg);
926       SDValue ArgVal = DAG.getUNDEF(PtrVT);
927       CLI.OutVals.push_back(ArgVal);
928     }
929     if (!HasSwiftErrorArg) {
930       NumFixedArgs++;
931       ISD::OutputArg Arg;
932       Arg.Flags.setSwiftError();
933       CLI.Outs.push_back(Arg);
934       SDValue ArgVal = DAG.getUNDEF(PtrVT);
935       CLI.OutVals.push_back(ArgVal);
936     }
937   }
938 
939   // Analyze operands of the call, assigning locations to each operand.
940   SmallVector<CCValAssign, 16> ArgLocs;
941   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
942 
943   if (IsVarArg) {
944     // Outgoing non-fixed arguments are placed in a buffer. First
945     // compute their offsets and the total amount of buffer space needed.
946     for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
947       const ISD::OutputArg &Out = Outs[I];
948       SDValue &Arg = OutVals[I];
949       EVT VT = Arg.getValueType();
950       assert(VT != MVT::iPTR && "Legalized args should be concrete");
951       Type *Ty = VT.getTypeForEVT(*DAG.getContext());
952       Align Alignment =
953           std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));
954       unsigned Offset =
955           CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);
956       CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
957                                         Offset, VT.getSimpleVT(),
958                                         CCValAssign::Full));
959     }
960   }
961 
962   unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
963 
964   SDValue FINode;
965   if (IsVarArg && NumBytes) {
966     // For non-fixed arguments, next emit stores to store the argument values
967     // to the stack buffer at the offsets computed above.
968     int FI = MF.getFrameInfo().CreateStackObject(NumBytes,
969                                                  Layout.getStackAlignment(),
970                                                  /*isSS=*/false);
971     unsigned ValNo = 0;
972     SmallVector<SDValue, 8> Chains;
973     for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) {
974       assert(ArgLocs[ValNo].getValNo() == ValNo &&
975              "ArgLocs should remain in order and only hold varargs args");
976       unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
977       FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
978       SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
979                                 DAG.getConstant(Offset, DL, PtrVT));
980       Chains.push_back(
981           DAG.getStore(Chain, DL, Arg, Add,
982                        MachinePointerInfo::getFixedStack(MF, FI, Offset)));
983     }
984     if (!Chains.empty())
985       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
986   } else if (IsVarArg) {
987     FINode = DAG.getIntPtrConstant(0, DL);
988   }
989 
990   if (Callee->getOpcode() == ISD::GlobalAddress) {
991     // If the callee is a GlobalAddress node (quite common, every direct call
992     // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
993     // doesn't at MO_GOT which is not needed for direct calls.
994     GlobalAddressSDNode* GA = cast<GlobalAddressSDNode>(Callee);
995     Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
996                                         getPointerTy(DAG.getDataLayout()),
997                                         GA->getOffset());
998     Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
999                          getPointerTy(DAG.getDataLayout()), Callee);
1000   }
1001 
1002   // Compute the operands for the CALLn node.
1003   SmallVector<SDValue, 16> Ops;
1004   Ops.push_back(Chain);
1005   Ops.push_back(Callee);
1006 
1007   // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1008   // isn't reliable.
1009   Ops.append(OutVals.begin(),
1010              IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
1011   // Add a pointer to the vararg buffer.
1012   if (IsVarArg)
1013     Ops.push_back(FINode);
1014 
1015   SmallVector<EVT, 8> InTys;
1016   for (const auto &In : Ins) {
1017     assert(!In.Flags.isByVal() && "byval is not valid for return values");
1018     assert(!In.Flags.isNest() && "nest is not valid for return values");
1019     if (In.Flags.isInAlloca())
1020       fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
1021     if (In.Flags.isInConsecutiveRegs())
1022       fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
1023     if (In.Flags.isInConsecutiveRegsLast())
1024       fail(DL, DAG,
1025            "WebAssembly hasn't implemented cons regs last return values");
1026     // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1027     // registers.
1028     InTys.push_back(In.VT);
1029   }
1030 
1031   if (CLI.IsTailCall) {
1032     // ret_calls do not return values to the current frame
1033     SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1034     return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
1035   }
1036 
1037   InTys.push_back(MVT::Other);
1038   SDVTList InTyList = DAG.getVTList(InTys);
1039   SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
1040 
1041   for (size_t I = 0; I < Ins.size(); ++I)
1042     InVals.push_back(Res.getValue(I));
1043 
1044   // Return the chain
1045   return Res.getValue(Ins.size());
1046 }
1047 
1048 bool WebAssemblyTargetLowering::CanLowerReturn(
1049     CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
1050     const SmallVectorImpl<ISD::OutputArg> &Outs,
1051     LLVMContext & /*Context*/) const {
1052   // WebAssembly can only handle returning tuples with multivalue enabled
1053   return Subtarget->hasMultivalue() || Outs.size() <= 1;
1054 }
1055 
1056 SDValue WebAssemblyTargetLowering::LowerReturn(
1057     SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
1058     const SmallVectorImpl<ISD::OutputArg> &Outs,
1059     const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1060     SelectionDAG &DAG) const {
1061   assert((Subtarget->hasMultivalue() || Outs.size() <= 1) &&
1062          "MVP WebAssembly can only return up to one value");
1063   if (!callingConvSupported(CallConv))
1064     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1065 
1066   SmallVector<SDValue, 4> RetOps(1, Chain);
1067   RetOps.append(OutVals.begin(), OutVals.end());
1068   Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
1069 
1070   // Record the number and types of the return values.
1071   for (const ISD::OutputArg &Out : Outs) {
1072     assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1073     assert(!Out.Flags.isNest() && "nest is not valid for return values");
1074     assert(Out.IsFixed && "non-fixed return value is not valid");
1075     if (Out.Flags.isInAlloca())
1076       fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
1077     if (Out.Flags.isInConsecutiveRegs())
1078       fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
1079     if (Out.Flags.isInConsecutiveRegsLast())
1080       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
1081   }
1082 
1083   return Chain;
1084 }
1085 
1086 SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1087     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1088     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1089     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1090   if (!callingConvSupported(CallConv))
1091     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1092 
1093   MachineFunction &MF = DAG.getMachineFunction();
1094   auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1095 
1096   // Set up the incoming ARGUMENTS value, which serves to represent the liveness
1097   // of the incoming values before they're represented by virtual registers.
1098   MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
1099 
1100   bool HasSwiftErrorArg = false;
1101   bool HasSwiftSelfArg = false;
1102   for (const ISD::InputArg &In : Ins) {
1103     HasSwiftSelfArg |= In.Flags.isSwiftSelf();
1104     HasSwiftErrorArg |= In.Flags.isSwiftError();
1105     if (In.Flags.isInAlloca())
1106       fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1107     if (In.Flags.isNest())
1108       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1109     if (In.Flags.isInConsecutiveRegs())
1110       fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1111     if (In.Flags.isInConsecutiveRegsLast())
1112       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1113     // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1114     // registers.
1115     InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
1116                                            DAG.getTargetConstant(InVals.size(),
1117                                                                  DL, MVT::i32))
1118                              : DAG.getUNDEF(In.VT));
1119 
1120     // Record the number and types of arguments.
1121     MFI->addParam(In.VT);
1122   }
1123 
1124   // For swiftcc, emit additional swiftself and swifterror arguments
1125   // if there aren't. These additional arguments are also added for callee
1126   // signature They are necessary to match callee and caller signature for
1127   // indirect call.
1128   auto PtrVT = getPointerTy(MF.getDataLayout());
1129   if (CallConv == CallingConv::Swift) {
1130     if (!HasSwiftSelfArg) {
1131       MFI->addParam(PtrVT);
1132     }
1133     if (!HasSwiftErrorArg) {
1134       MFI->addParam(PtrVT);
1135     }
1136   }
1137   // Varargs are copied into a buffer allocated by the caller, and a pointer to
1138   // the buffer is passed as an argument.
1139   if (IsVarArg) {
1140     MVT PtrVT = getPointerTy(MF.getDataLayout());
1141     Register VarargVreg =
1142         MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT));
1143     MFI->setVarargBufferVreg(VarargVreg);
1144     Chain = DAG.getCopyToReg(
1145         Chain, DL, VarargVreg,
1146         DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
1147                     DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
1148     MFI->addParam(PtrVT);
1149   }
1150 
1151   // Record the number and types of arguments and results.
1152   SmallVector<MVT, 4> Params;
1153   SmallVector<MVT, 4> Results;
1154   computeSignatureVTs(MF.getFunction().getFunctionType(), &MF.getFunction(),
1155                       MF.getFunction(), DAG.getTarget(), Params, Results);
1156   for (MVT VT : Results)
1157     MFI->addResult(VT);
1158   // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1159   // the param logic here with ComputeSignatureVTs
1160   assert(MFI->getParams().size() == Params.size() &&
1161          std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1162                     Params.begin()));
1163 
1164   return Chain;
1165 }
1166 
1167 void WebAssemblyTargetLowering::ReplaceNodeResults(
1168     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
1169   switch (N->getOpcode()) {
1170   case ISD::SIGN_EXTEND_INREG:
1171     // Do not add any results, signifying that N should not be custom lowered
1172     // after all. This happens because simd128 turns on custom lowering for
1173     // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1174     // illegal type.
1175     break;
1176   default:
1177     llvm_unreachable(
1178         "ReplaceNodeResults not implemented for this op for WebAssembly!");
1179   }
1180 }
1181 
1182 //===----------------------------------------------------------------------===//
1183 //  Custom lowering hooks.
1184 //===----------------------------------------------------------------------===//
1185 
1186 SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1187                                                   SelectionDAG &DAG) const {
1188   SDLoc DL(Op);
1189   switch (Op.getOpcode()) {
1190   default:
1191     llvm_unreachable("unimplemented operation lowering");
1192     return SDValue();
1193   case ISD::FrameIndex:
1194     return LowerFrameIndex(Op, DAG);
1195   case ISD::GlobalAddress:
1196     return LowerGlobalAddress(Op, DAG);
1197   case ISD::GlobalTLSAddress:
1198     return LowerGlobalTLSAddress(Op, DAG);
1199   case ISD::ExternalSymbol:
1200     return LowerExternalSymbol(Op, DAG);
1201   case ISD::JumpTable:
1202     return LowerJumpTable(Op, DAG);
1203   case ISD::BR_JT:
1204     return LowerBR_JT(Op, DAG);
1205   case ISD::VASTART:
1206     return LowerVASTART(Op, DAG);
1207   case ISD::BlockAddress:
1208   case ISD::BRIND:
1209     fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1210     return SDValue();
1211   case ISD::RETURNADDR:
1212     return LowerRETURNADDR(Op, DAG);
1213   case ISD::FRAMEADDR:
1214     return LowerFRAMEADDR(Op, DAG);
1215   case ISD::CopyToReg:
1216     return LowerCopyToReg(Op, DAG);
1217   case ISD::EXTRACT_VECTOR_ELT:
1218   case ISD::INSERT_VECTOR_ELT:
1219     return LowerAccessVectorElement(Op, DAG);
1220   case ISD::INTRINSIC_VOID:
1221   case ISD::INTRINSIC_WO_CHAIN:
1222   case ISD::INTRINSIC_W_CHAIN:
1223     return LowerIntrinsic(Op, DAG);
1224   case ISD::SIGN_EXTEND_INREG:
1225     return LowerSIGN_EXTEND_INREG(Op, DAG);
1226   case ISD::BUILD_VECTOR:
1227     return LowerBUILD_VECTOR(Op, DAG);
1228   case ISD::VECTOR_SHUFFLE:
1229     return LowerVECTOR_SHUFFLE(Op, DAG);
1230   case ISD::SETCC:
1231     return LowerSETCC(Op, DAG);
1232   case ISD::SHL:
1233   case ISD::SRA:
1234   case ISD::SRL:
1235     return LowerShift(Op, DAG);
1236   }
1237 }
1238 
1239 SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1240                                                   SelectionDAG &DAG) const {
1241   SDValue Src = Op.getOperand(2);
1242   if (isa<FrameIndexSDNode>(Src.getNode())) {
1243     // CopyToReg nodes don't support FrameIndex operands. Other targets select
1244     // the FI to some LEA-like instruction, but since we don't have that, we
1245     // need to insert some kind of instruction that can take an FI operand and
1246     // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1247     // local.copy between Op and its FI operand.
1248     SDValue Chain = Op.getOperand(0);
1249     SDLoc DL(Op);
1250     unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
1251     EVT VT = Src.getValueType();
1252     SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
1253                                                    : WebAssembly::COPY_I64,
1254                                     DL, VT, Src),
1255                  0);
1256     return Op.getNode()->getNumValues() == 1
1257                ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
1258                : DAG.getCopyToReg(Chain, DL, Reg, Copy,
1259                                   Op.getNumOperands() == 4 ? Op.getOperand(3)
1260                                                            : SDValue());
1261   }
1262   return SDValue();
1263 }
1264 
1265 SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
1266                                                    SelectionDAG &DAG) const {
1267   int FI = cast<FrameIndexSDNode>(Op)->getIndex();
1268   return DAG.getTargetFrameIndex(FI, Op.getValueType());
1269 }
1270 
1271 SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
1272                                                    SelectionDAG &DAG) const {
1273   SDLoc DL(Op);
1274 
1275   if (!Subtarget->getTargetTriple().isOSEmscripten()) {
1276     fail(DL, DAG,
1277          "Non-Emscripten WebAssembly hasn't implemented "
1278          "__builtin_return_address");
1279     return SDValue();
1280   }
1281 
1282   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1283     return SDValue();
1284 
1285   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1286   MakeLibCallOptions CallOptions;
1287   return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
1288                      {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
1289       .first;
1290 }
1291 
1292 SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
1293                                                   SelectionDAG &DAG) const {
1294   // Non-zero depths are not supported by WebAssembly currently. Use the
1295   // legalizer's default expansion, which is to return 0 (what this function is
1296   // documented to do).
1297   if (Op.getConstantOperandVal(0) > 0)
1298     return SDValue();
1299 
1300   DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
1301   EVT VT = Op.getValueType();
1302   Register FP =
1303       Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
1304   return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
1305 }
1306 
1307 SDValue
1308 WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1309                                                  SelectionDAG &DAG) const {
1310   SDLoc DL(Op);
1311   const auto *GA = cast<GlobalAddressSDNode>(Op);
1312   MVT PtrVT = getPointerTy(DAG.getDataLayout());
1313 
1314   MachineFunction &MF = DAG.getMachineFunction();
1315   if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
1316     report_fatal_error("cannot use thread-local storage without bulk memory",
1317                        false);
1318 
1319   const GlobalValue *GV = GA->getGlobal();
1320 
1321   // Currently Emscripten does not support dynamic linking with threads.
1322   // Therefore, if we have thread-local storage, only the local-exec model
1323   // is possible.
1324   // TODO: remove this and implement proper TLS models once Emscripten
1325   // supports dynamic linking with threads.
1326   if (GV->getThreadLocalMode() != GlobalValue::LocalExecTLSModel &&
1327       !Subtarget->getTargetTriple().isOSEmscripten()) {
1328     report_fatal_error("only -ftls-model=local-exec is supported for now on "
1329                        "non-Emscripten OSes: variable " +
1330                            GV->getName(),
1331                        false);
1332   }
1333 
1334   auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
1335                                      : WebAssembly::GLOBAL_GET_I32;
1336   const char *BaseName = MF.createExternalSymbolName("__tls_base");
1337 
1338   SDValue BaseAddr(
1339       DAG.getMachineNode(GlobalGet, DL, PtrVT,
1340                          DAG.getTargetExternalSymbol(BaseName, PtrVT)),
1341       0);
1342 
1343   SDValue TLSOffset = DAG.getTargetGlobalAddress(
1344       GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
1345   SDValue SymAddr = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, TLSOffset);
1346 
1347   return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
1348 }
1349 
1350 SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
1351                                                       SelectionDAG &DAG) const {
1352   SDLoc DL(Op);
1353   const auto *GA = cast<GlobalAddressSDNode>(Op);
1354   EVT VT = Op.getValueType();
1355   assert(GA->getTargetFlags() == 0 &&
1356          "Unexpected target flags on generic GlobalAddressSDNode");
1357   if (GA->getAddressSpace() != 0)
1358     fail(DL, DAG, "WebAssembly only expects the 0 address space");
1359 
1360   unsigned OperandFlags = 0;
1361   if (isPositionIndependent()) {
1362     const GlobalValue *GV = GA->getGlobal();
1363     if (getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) {
1364       MachineFunction &MF = DAG.getMachineFunction();
1365       MVT PtrVT = getPointerTy(MF.getDataLayout());
1366       const char *BaseName;
1367       if (GV->getValueType()->isFunctionTy()) {
1368         BaseName = MF.createExternalSymbolName("__table_base");
1369         OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL;
1370       }
1371       else {
1372         BaseName = MF.createExternalSymbolName("__memory_base");
1373         OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL;
1374       }
1375       SDValue BaseAddr =
1376           DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
1377                       DAG.getTargetExternalSymbol(BaseName, PtrVT));
1378 
1379       SDValue SymAddr = DAG.getNode(
1380           WebAssemblyISD::WrapperPIC, DL, VT,
1381           DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
1382                                      OperandFlags));
1383 
1384       return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
1385     } else {
1386       OperandFlags = WebAssemblyII::MO_GOT;
1387     }
1388   }
1389 
1390   return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1391                      DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
1392                                                 GA->getOffset(), OperandFlags));
1393 }
1394 
1395 SDValue
1396 WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
1397                                                SelectionDAG &DAG) const {
1398   SDLoc DL(Op);
1399   const auto *ES = cast<ExternalSymbolSDNode>(Op);
1400   EVT VT = Op.getValueType();
1401   assert(ES->getTargetFlags() == 0 &&
1402          "Unexpected target flags on generic ExternalSymbolSDNode");
1403   return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1404                      DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
1405 }
1406 
1407 SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
1408                                                   SelectionDAG &DAG) const {
1409   // There's no need for a Wrapper node because we always incorporate a jump
1410   // table operand into a BR_TABLE instruction, rather than ever
1411   // materializing it in a register.
1412   const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1413   return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
1414                                 JT->getTargetFlags());
1415 }
1416 
1417 SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
1418                                               SelectionDAG &DAG) const {
1419   SDLoc DL(Op);
1420   SDValue Chain = Op.getOperand(0);
1421   const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
1422   SDValue Index = Op.getOperand(2);
1423   assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
1424 
1425   SmallVector<SDValue, 8> Ops;
1426   Ops.push_back(Chain);
1427   Ops.push_back(Index);
1428 
1429   MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
1430   const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
1431 
1432   // Add an operand for each case.
1433   for (auto MBB : MBBs)
1434     Ops.push_back(DAG.getBasicBlock(MBB));
1435 
1436   // Add the first MBB as a dummy default target for now. This will be replaced
1437   // with the proper default target (and the preceding range check eliminated)
1438   // if possible by WebAssemblyFixBrTableDefaults.
1439   Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));
1440   return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
1441 }
1442 
1443 SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
1444                                                 SelectionDAG &DAG) const {
1445   SDLoc DL(Op);
1446   EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
1447 
1448   auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
1449   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1450 
1451   SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
1452                                     MFI->getVarargBufferVreg(), PtrVT);
1453   return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
1454                       MachinePointerInfo(SV));
1455 }
1456 
1457 static SDValue getCppExceptionSymNode(SDValue Op, unsigned TagIndex,
1458                                       SelectionDAG &DAG) {
1459   // We only support C++ exceptions for now
1460   int Tag =
1461       cast<ConstantSDNode>(Op.getOperand(TagIndex).getNode())->getZExtValue();
1462   if (Tag != WebAssembly::CPP_EXCEPTION)
1463     llvm_unreachable("Invalid tag: We only support C++ exceptions for now");
1464   auto &MF = DAG.getMachineFunction();
1465   const auto &TLI = DAG.getTargetLoweringInfo();
1466   MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
1467   const char *SymName = MF.createExternalSymbolName("__cpp_exception");
1468   return DAG.getNode(WebAssemblyISD::Wrapper, SDLoc(Op), PtrVT,
1469                      DAG.getTargetExternalSymbol(SymName, PtrVT));
1470 }
1471 
1472 SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
1473                                                   SelectionDAG &DAG) const {
1474   MachineFunction &MF = DAG.getMachineFunction();
1475   unsigned IntNo;
1476   switch (Op.getOpcode()) {
1477   case ISD::INTRINSIC_VOID:
1478   case ISD::INTRINSIC_W_CHAIN:
1479     IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1480     break;
1481   case ISD::INTRINSIC_WO_CHAIN:
1482     IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1483     break;
1484   default:
1485     llvm_unreachable("Invalid intrinsic");
1486   }
1487   SDLoc DL(Op);
1488 
1489   switch (IntNo) {
1490   default:
1491     return SDValue(); // Don't custom lower most intrinsics.
1492 
1493   case Intrinsic::wasm_lsda: {
1494     EVT VT = Op.getValueType();
1495     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1496     MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
1497     auto &Context = MF.getMMI().getContext();
1498     MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") +
1499                                             Twine(MF.getFunctionNumber()));
1500     return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1501                        DAG.getMCSymbol(S, PtrVT));
1502   }
1503 
1504   case Intrinsic::wasm_throw: {
1505     SDValue SymNode = getCppExceptionSymNode(Op, 2, DAG);
1506     return DAG.getNode(WebAssemblyISD::THROW, DL,
1507                        MVT::Other, // outchain type
1508                        {
1509                            Op.getOperand(0), // inchain
1510                            SymNode,          // exception symbol
1511                            Op.getOperand(3)  // thrown value
1512                        });
1513   }
1514 
1515   case Intrinsic::wasm_catch: {
1516     SDValue SymNode = getCppExceptionSymNode(Op, 2, DAG);
1517     return DAG.getNode(WebAssemblyISD::CATCH, DL,
1518                        {
1519                            MVT::i32,  // outchain type
1520                            MVT::Other // return value
1521                        },
1522                        {
1523                            Op.getOperand(0), // inchain
1524                            SymNode           // exception symbol
1525                        });
1526   }
1527 
1528   case Intrinsic::wasm_shuffle: {
1529     // Drop in-chain and replace undefs, but otherwise pass through unchanged
1530     SDValue Ops[18];
1531     size_t OpIdx = 0;
1532     Ops[OpIdx++] = Op.getOperand(1);
1533     Ops[OpIdx++] = Op.getOperand(2);
1534     while (OpIdx < 18) {
1535       const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
1536       if (MaskIdx.isUndef() ||
1537           cast<ConstantSDNode>(MaskIdx.getNode())->getZExtValue() >= 32) {
1538         Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32);
1539       } else {
1540         Ops[OpIdx++] = MaskIdx;
1541       }
1542     }
1543     return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
1544   }
1545   }
1546 }
1547 
1548 SDValue
1549 WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
1550                                                   SelectionDAG &DAG) const {
1551   SDLoc DL(Op);
1552   // If sign extension operations are disabled, allow sext_inreg only if operand
1553   // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
1554   // extension operations, but allowing sext_inreg in this context lets us have
1555   // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
1556   // everywhere would be simpler in this file, but would necessitate large and
1557   // brittle patterns to undo the expansion and select extract_lane_s
1558   // instructions.
1559   assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
1560   if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1561     return SDValue();
1562 
1563   const SDValue &Extract = Op.getOperand(0);
1564   MVT VecT = Extract.getOperand(0).getSimpleValueType();
1565   if (VecT.getVectorElementType().getSizeInBits() > 32)
1566     return SDValue();
1567   MVT ExtractedLaneT =
1568       cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();
1569   MVT ExtractedVecT =
1570       MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
1571   if (ExtractedVecT == VecT)
1572     return Op;
1573 
1574   // Bitcast vector to appropriate type to ensure ISel pattern coverage
1575   const SDNode *Index = Extract.getOperand(1).getNode();
1576   if (!isa<ConstantSDNode>(Index))
1577     return SDValue();
1578   unsigned IndexVal = cast<ConstantSDNode>(Index)->getZExtValue();
1579   unsigned Scale =
1580       ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
1581   assert(Scale > 1);
1582   SDValue NewIndex =
1583       DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));
1584   SDValue NewExtract = DAG.getNode(
1585       ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(),
1586       DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
1587   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
1588                      Op.getOperand(1));
1589 }
1590 
1591 SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
1592                                                      SelectionDAG &DAG) const {
1593   SDLoc DL(Op);
1594   const EVT VecT = Op.getValueType();
1595   const EVT LaneT = Op.getOperand(0).getValueType();
1596   const size_t Lanes = Op.getNumOperands();
1597   bool CanSwizzle = VecT == MVT::v16i8;
1598 
1599   // BUILD_VECTORs are lowered to the instruction that initializes the highest
1600   // possible number of lanes at once followed by a sequence of replace_lane
1601   // instructions to individually initialize any remaining lanes.
1602 
1603   // TODO: Tune this. For example, lanewise swizzling is very expensive, so
1604   // swizzled lanes should be given greater weight.
1605 
1606   // TODO: Investigate building vectors by shuffling together vectors built by
1607   // separately specialized means.
1608 
1609   auto IsConstant = [](const SDValue &V) {
1610     return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
1611   };
1612 
1613   // Returns the source vector and index vector pair if they exist. Checks for:
1614   //   (extract_vector_elt
1615   //     $src,
1616   //     (sign_extend_inreg (extract_vector_elt $indices, $i))
1617   //   )
1618   auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
1619     auto Bail = std::make_pair(SDValue(), SDValue());
1620     if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1621       return Bail;
1622     const SDValue &SwizzleSrc = Lane->getOperand(0);
1623     const SDValue &IndexExt = Lane->getOperand(1);
1624     if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
1625       return Bail;
1626     const SDValue &Index = IndexExt->getOperand(0);
1627     if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1628       return Bail;
1629     const SDValue &SwizzleIndices = Index->getOperand(0);
1630     if (SwizzleSrc.getValueType() != MVT::v16i8 ||
1631         SwizzleIndices.getValueType() != MVT::v16i8 ||
1632         Index->getOperand(1)->getOpcode() != ISD::Constant ||
1633         Index->getConstantOperandVal(1) != I)
1634       return Bail;
1635     return std::make_pair(SwizzleSrc, SwizzleIndices);
1636   };
1637 
1638   using ValueEntry = std::pair<SDValue, size_t>;
1639   SmallVector<ValueEntry, 16> SplatValueCounts;
1640 
1641   using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
1642   SmallVector<SwizzleEntry, 16> SwizzleCounts;
1643 
1644   auto AddCount = [](auto &Counts, const auto &Val) {
1645     auto CountIt =
1646         llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
1647     if (CountIt == Counts.end()) {
1648       Counts.emplace_back(Val, 1);
1649     } else {
1650       CountIt->second++;
1651     }
1652   };
1653 
1654   auto GetMostCommon = [](auto &Counts) {
1655     auto CommonIt =
1656         std::max_element(Counts.begin(), Counts.end(),
1657                          [](auto A, auto B) { return A.second < B.second; });
1658     assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
1659     return *CommonIt;
1660   };
1661 
1662   size_t NumConstantLanes = 0;
1663 
1664   // Count eligible lanes for each type of vector creation op
1665   for (size_t I = 0; I < Lanes; ++I) {
1666     const SDValue &Lane = Op->getOperand(I);
1667     if (Lane.isUndef())
1668       continue;
1669 
1670     AddCount(SplatValueCounts, Lane);
1671 
1672     if (IsConstant(Lane)) {
1673       NumConstantLanes++;
1674     } else if (CanSwizzle) {
1675       auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
1676       if (SwizzleSrcs.first)
1677         AddCount(SwizzleCounts, SwizzleSrcs);
1678     }
1679   }
1680 
1681   SDValue SplatValue;
1682   size_t NumSplatLanes;
1683   std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
1684 
1685   SDValue SwizzleSrc;
1686   SDValue SwizzleIndices;
1687   size_t NumSwizzleLanes = 0;
1688   if (SwizzleCounts.size())
1689     std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
1690                           NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
1691 
1692   // Predicate returning true if the lane is properly initialized by the
1693   // original instruction
1694   std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
1695   SDValue Result;
1696   // Prefer swizzles over vector consts over splats
1697   if (NumSwizzleLanes >= NumSplatLanes &&
1698       (!Subtarget->hasUnimplementedSIMD128() ||
1699        NumSwizzleLanes >= NumConstantLanes)) {
1700     Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
1701                          SwizzleIndices);
1702     auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
1703     IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
1704       return Swizzled == GetSwizzleSrcs(I, Lane);
1705     };
1706   } else if (NumConstantLanes >= NumSplatLanes &&
1707              Subtarget->hasUnimplementedSIMD128()) {
1708     // If we support v128.const, emit it directly
1709     SmallVector<SDValue, 16> ConstLanes;
1710     for (const SDValue &Lane : Op->op_values()) {
1711       if (IsConstant(Lane)) {
1712         ConstLanes.push_back(Lane);
1713       } else if (LaneT.isFloatingPoint()) {
1714         ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
1715       } else {
1716         ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
1717       }
1718     }
1719     Result = DAG.getBuildVector(VecT, DL, ConstLanes);
1720     IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
1721       return IsConstant(Lane);
1722     };
1723   } else if (NumConstantLanes >= NumSplatLanes && VecT.isInteger()) {
1724     // Otherwise, if this is an integer vector, pack the lane values together so
1725     // we can construct the 128-bit constant from a pair of i64s using a splat
1726     // followed by at most one i64x2.replace_lane. Also keep track of the lanes
1727     // that actually matter so we can avoid the replace_lane in more cases.
1728     std::array<uint64_t, 2> I64s{{0, 0}};
1729     std::array<uint64_t, 2> ConstLaneMasks{{0, 0}};
1730     size_t LaneBits = 128 / Lanes;
1731     size_t HalfLanes = Lanes / 2;
1732     for (size_t I = 0; I < Lanes; ++I) {
1733       const SDValue &Lane = Op.getOperand(I);
1734       if (IsConstant(Lane)) {
1735         // How much we need to shift Val to position it in an i64
1736         auto Shift = LaneBits * (I % HalfLanes);
1737         auto Mask = maskTrailingOnes<uint64_t>(LaneBits);
1738         auto Val = cast<ConstantSDNode>(Lane.getNode())->getZExtValue() & Mask;
1739         I64s[I / HalfLanes] |= Val << Shift;
1740         ConstLaneMasks[I / HalfLanes] |= Mask << Shift;
1741       }
1742     }
1743     // Check whether all constant lanes in the second half of the vector are
1744     // equivalent in the first half or vice versa to determine whether splatting
1745     // either side will be sufficient to materialize the constant. As a special
1746     // case, if the first and second halves have no constant lanes in common, we
1747     // can just combine them.
1748     bool FirstHalfSufficient = (I64s[0] & ConstLaneMasks[1]) == I64s[1];
1749     bool SecondHalfSufficient = (I64s[1] & ConstLaneMasks[0]) == I64s[0];
1750     bool CombinedSufficient = (ConstLaneMasks[0] & ConstLaneMasks[1]) == 0;
1751 
1752     uint64_t Splatted;
1753     if (SecondHalfSufficient) {
1754       Splatted = I64s[1];
1755     } else if (CombinedSufficient) {
1756       Splatted = I64s[0] | I64s[1];
1757     } else {
1758       Splatted = I64s[0];
1759     }
1760 
1761     Result = DAG.getSplatBuildVector(MVT::v2i64, DL,
1762                                      DAG.getConstant(Splatted, DL, MVT::i64));
1763     if (!FirstHalfSufficient && !SecondHalfSufficient && !CombinedSufficient) {
1764       Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i64, Result,
1765                            DAG.getConstant(I64s[1], DL, MVT::i64),
1766                            DAG.getConstant(1, DL, MVT::i32));
1767     }
1768     Result = DAG.getBitcast(VecT, Result);
1769     IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
1770       return IsConstant(Lane);
1771     };
1772   } else {
1773     // Use a splat, but possibly a load_splat
1774     LoadSDNode *SplattedLoad;
1775     if ((SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) &&
1776         SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) {
1777       Result = DAG.getMemIntrinsicNode(
1778           WebAssemblyISD::LOAD_SPLAT, DL, DAG.getVTList(VecT),
1779           {SplattedLoad->getChain(), SplattedLoad->getBasePtr(),
1780            SplattedLoad->getOffset()},
1781           SplattedLoad->getMemoryVT(), SplattedLoad->getMemOperand());
1782     } else {
1783       Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
1784     }
1785     IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
1786       return Lane == SplatValue;
1787     };
1788   }
1789 
1790   assert(Result);
1791   assert(IsLaneConstructed);
1792 
1793   // Add replace_lane instructions for any unhandled values
1794   for (size_t I = 0; I < Lanes; ++I) {
1795     const SDValue &Lane = Op->getOperand(I);
1796     if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
1797       Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
1798                            DAG.getConstant(I, DL, MVT::i32));
1799   }
1800 
1801   return Result;
1802 }
1803 
1804 SDValue
1805 WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
1806                                                SelectionDAG &DAG) const {
1807   SDLoc DL(Op);
1808   ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
1809   MVT VecType = Op.getOperand(0).getSimpleValueType();
1810   assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
1811   size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
1812 
1813   // Space for two vector args and sixteen mask indices
1814   SDValue Ops[18];
1815   size_t OpIdx = 0;
1816   Ops[OpIdx++] = Op.getOperand(0);
1817   Ops[OpIdx++] = Op.getOperand(1);
1818 
1819   // Expand mask indices to byte indices and materialize them as operands
1820   for (int M : Mask) {
1821     for (size_t J = 0; J < LaneBytes; ++J) {
1822       // Lower undefs (represented by -1 in mask) to zero
1823       uint64_t ByteIndex = M == -1 ? 0 : (uint64_t)M * LaneBytes + J;
1824       Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
1825     }
1826   }
1827 
1828   return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
1829 }
1830 
1831 SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
1832                                               SelectionDAG &DAG) const {
1833   SDLoc DL(Op);
1834   // The legalizer does not know how to expand the comparison modes of i64x2
1835   // vectors because no comparison modes are supported. We could solve this by
1836   // expanding all i64x2 SETCC nodes, but that seems to expand f64x2 SETCC nodes
1837   // (which return i64x2 results) as well. So instead we manually unroll i64x2
1838   // comparisons here.
1839   assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
1840   SmallVector<SDValue, 2> LHS, RHS;
1841   DAG.ExtractVectorElements(Op->getOperand(0), LHS);
1842   DAG.ExtractVectorElements(Op->getOperand(1), RHS);
1843   const SDValue &CC = Op->getOperand(2);
1844   auto MakeLane = [&](unsigned I) {
1845     return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
1846                        DAG.getConstant(uint64_t(-1), DL, MVT::i64),
1847                        DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
1848   };
1849   return DAG.getBuildVector(Op->getValueType(0), DL,
1850                             {MakeLane(0), MakeLane(1)});
1851 }
1852 
1853 SDValue
1854 WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
1855                                                     SelectionDAG &DAG) const {
1856   // Allow constant lane indices, expand variable lane indices
1857   SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
1858   if (isa<ConstantSDNode>(IdxNode) || IdxNode->isUndef())
1859     return Op;
1860   else
1861     // Perform default expansion
1862     return SDValue();
1863 }
1864 
1865 static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) {
1866   EVT LaneT = Op.getSimpleValueType().getVectorElementType();
1867   // 32-bit and 64-bit unrolled shifts will have proper semantics
1868   if (LaneT.bitsGE(MVT::i32))
1869     return DAG.UnrollVectorOp(Op.getNode());
1870   // Otherwise mask the shift value to get proper semantics from 32-bit shift
1871   SDLoc DL(Op);
1872   size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
1873   SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
1874   unsigned ShiftOpcode = Op.getOpcode();
1875   SmallVector<SDValue, 16> ShiftedElements;
1876   DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
1877   SmallVector<SDValue, 16> ShiftElements;
1878   DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
1879   SmallVector<SDValue, 16> UnrolledOps;
1880   for (size_t i = 0; i < NumLanes; ++i) {
1881     SDValue MaskedShiftValue =
1882         DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
1883     SDValue ShiftedValue = ShiftedElements[i];
1884     if (ShiftOpcode == ISD::SRA)
1885       ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
1886                                  ShiftedValue, DAG.getValueType(LaneT));
1887     UnrolledOps.push_back(
1888         DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
1889   }
1890   return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
1891 }
1892 
1893 SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
1894                                               SelectionDAG &DAG) const {
1895   SDLoc DL(Op);
1896 
1897   // Only manually lower vector shifts
1898   assert(Op.getSimpleValueType().isVector());
1899 
1900   auto ShiftVal = DAG.getSplatValue(Op.getOperand(1));
1901   if (!ShiftVal)
1902     return unrollVectorShift(Op, DAG);
1903 
1904   // Use anyext because none of the high bits can affect the shift
1905   ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
1906 
1907   unsigned Opcode;
1908   switch (Op.getOpcode()) {
1909   case ISD::SHL:
1910     Opcode = WebAssemblyISD::VEC_SHL;
1911     break;
1912   case ISD::SRA:
1913     Opcode = WebAssemblyISD::VEC_SHR_S;
1914     break;
1915   case ISD::SRL:
1916     Opcode = WebAssemblyISD::VEC_SHR_U;
1917     break;
1918   default:
1919     llvm_unreachable("unexpected opcode");
1920   }
1921 
1922   return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
1923 }
1924 
1925 //===----------------------------------------------------------------------===//
1926 //   Custom DAG combine hooks
1927 //===----------------------------------------------------------------------===//
1928 static SDValue
1929 performVECTOR_SHUFFLECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
1930   auto &DAG = DCI.DAG;
1931   auto Shuffle = cast<ShuffleVectorSDNode>(N);
1932 
1933   // Hoist vector bitcasts that don't change the number of lanes out of unary
1934   // shuffles, where they are less likely to get in the way of other combines.
1935   // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
1936   //  (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
1937   SDValue Bitcast = N->getOperand(0);
1938   if (Bitcast.getOpcode() != ISD::BITCAST)
1939     return SDValue();
1940   if (!N->getOperand(1).isUndef())
1941     return SDValue();
1942   SDValue CastOp = Bitcast.getOperand(0);
1943   MVT SrcType = CastOp.getSimpleValueType();
1944   MVT DstType = Bitcast.getSimpleValueType();
1945   if (!SrcType.is128BitVector() ||
1946       SrcType.getVectorNumElements() != DstType.getVectorNumElements())
1947     return SDValue();
1948   SDValue NewShuffle = DAG.getVectorShuffle(
1949       SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());
1950   return DAG.getBitcast(DstType, NewShuffle);
1951 }
1952 
1953 static SDValue performVectorWidenCombine(SDNode *N,
1954                                          TargetLowering::DAGCombinerInfo &DCI) {
1955   auto &DAG = DCI.DAG;
1956   assert(N->getOpcode() == ISD::SIGN_EXTEND ||
1957          N->getOpcode() == ISD::ZERO_EXTEND);
1958 
1959   // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
1960   // possible before the extract_subvector can be expanded.
1961   auto Extract = N->getOperand(0);
1962   if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
1963     return SDValue();
1964   auto Source = Extract.getOperand(0);
1965   auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
1966   if (IndexNode == nullptr)
1967     return SDValue();
1968   auto Index = IndexNode->getZExtValue();
1969 
1970   // Only v8i8 and v4i16 extracts can be widened, and only if the extracted
1971   // subvector is the low or high half of its source.
1972   EVT ResVT = N->getValueType(0);
1973   if (ResVT == MVT::v8i16) {
1974     if (Extract.getValueType() != MVT::v8i8 ||
1975         Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
1976       return SDValue();
1977   } else if (ResVT == MVT::v4i32) {
1978     if (Extract.getValueType() != MVT::v4i16 ||
1979         Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
1980       return SDValue();
1981   } else {
1982     return SDValue();
1983   }
1984 
1985   bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
1986   bool IsLow = Index == 0;
1987 
1988   unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::WIDEN_LOW_S
1989                                 : WebAssemblyISD::WIDEN_HIGH_S)
1990                        : (IsLow ? WebAssemblyISD::WIDEN_LOW_U
1991                                 : WebAssemblyISD::WIDEN_HIGH_U);
1992 
1993   return DAG.getNode(Op, SDLoc(N), ResVT, Source);
1994 }
1995 
1996 SDValue
1997 WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
1998                                              DAGCombinerInfo &DCI) const {
1999   switch (N->getOpcode()) {
2000   default:
2001     return SDValue();
2002   case ISD::VECTOR_SHUFFLE:
2003     return performVECTOR_SHUFFLECombine(N, DCI);
2004   case ISD::SIGN_EXTEND:
2005   case ISD::ZERO_EXTEND:
2006     return performVectorWidenCombine(N, DCI);
2007   }
2008 }
2009