1 //=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the WebAssemblyTargetLowering class.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "WebAssemblyISelLowering.h"
15 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
16 #include "WebAssemblyMachineFunctionInfo.h"
17 #include "WebAssemblySubtarget.h"
18 #include "WebAssemblyTargetMachine.h"
19 #include "llvm/CodeGen/Analysis.h"
20 #include "llvm/CodeGen/CallingConvLower.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineJumpTableInfo.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/CodeGen/WasmEHFuncInfo.h"
27 #include "llvm/IR/DiagnosticInfo.h"
28 #include "llvm/IR/DiagnosticPrinter.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/IntrinsicsWebAssembly.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/MathExtras.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include "llvm/Target/TargetOptions.h"
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "wasm-lower"
40 
41 WebAssemblyTargetLowering::WebAssemblyTargetLowering(
42     const TargetMachine &TM, const WebAssemblySubtarget &STI)
43     : TargetLowering(TM), Subtarget(&STI) {
44   auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
45 
46   // Booleans always contain 0 or 1.
47   setBooleanContents(ZeroOrOneBooleanContent);
48   // Except in SIMD vectors
49   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
50   // We don't know the microarchitecture here, so just reduce register pressure.
51   setSchedulingPreference(Sched::RegPressure);
52   // Tell ISel that we have a stack pointer.
53   setStackPointerRegisterToSaveRestore(
54       Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
55   // Set up the register classes.
56   addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
57   addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
58   addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
59   addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
60   if (Subtarget->hasSIMD128()) {
61     addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
62     addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
63     addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
64     addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
65     addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
66     addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
67   }
68   // Compute derived properties from the register classes.
69   computeRegisterProperties(Subtarget->getRegisterInfo());
70 
71   setOperationAction(ISD::GlobalAddress, MVTPtr, Custom);
72   setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom);
73   setOperationAction(ISD::JumpTable, MVTPtr, Custom);
74   setOperationAction(ISD::BlockAddress, MVTPtr, Custom);
75   setOperationAction(ISD::BRIND, MVT::Other, Custom);
76 
77   // Take the default expansion for va_arg, va_copy, and va_end. There is no
78   // default action for va_start, so we do that custom.
79   setOperationAction(ISD::VASTART, MVT::Other, Custom);
80   setOperationAction(ISD::VAARG, MVT::Other, Expand);
81   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
82   setOperationAction(ISD::VAEND, MVT::Other, Expand);
83 
84   for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
85     // Don't expand the floating-point types to constant pools.
86     setOperationAction(ISD::ConstantFP, T, Legal);
87     // Expand floating-point comparisons.
88     for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
89                     ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
90       setCondCodeAction(CC, T, Expand);
91     // Expand floating-point library function operators.
92     for (auto Op :
93          {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA})
94       setOperationAction(Op, T, Expand);
95     // Note supported floating-point library function operators that otherwise
96     // default to expand.
97     for (auto Op :
98          {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT})
99       setOperationAction(Op, T, Legal);
100     // Support minimum and maximum, which otherwise default to expand.
101     setOperationAction(ISD::FMINIMUM, T, Legal);
102     setOperationAction(ISD::FMAXIMUM, T, Legal);
103     // WebAssembly currently has no builtin f16 support.
104     setOperationAction(ISD::FP16_TO_FP, T, Expand);
105     setOperationAction(ISD::FP_TO_FP16, T, Expand);
106     setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand);
107     setTruncStoreAction(T, MVT::f16, Expand);
108   }
109 
110   // Expand unavailable integer operations.
111   for (auto Op :
112        {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,
113         ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS,
114         ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) {
115     for (auto T : {MVT::i32, MVT::i64})
116       setOperationAction(Op, T, Expand);
117     if (Subtarget->hasSIMD128())
118       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
119         setOperationAction(Op, T, Expand);
120   }
121 
122   // SIMD-specific configuration
123   if (Subtarget->hasSIMD128()) {
124     // Hoist bitcasts out of shuffles
125     setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
126 
127     // Combine extends of extract_subvectors into widening ops
128     setTargetDAGCombine(ISD::SIGN_EXTEND);
129     setTargetDAGCombine(ISD::ZERO_EXTEND);
130 
131     // Support saturating add for i8x16 and i16x8
132     for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
133       for (auto T : {MVT::v16i8, MVT::v8i16})
134         setOperationAction(Op, T, Legal);
135 
136     // Support integer abs
137     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
138       setOperationAction(ISD::ABS, T, Legal);
139 
140     // Custom lower BUILD_VECTORs to minimize number of replace_lanes
141     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
142                    MVT::v2f64})
143       setOperationAction(ISD::BUILD_VECTOR, T, Custom);
144 
145     // We have custom shuffle lowering to expose the shuffle mask
146     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
147                    MVT::v2f64})
148       setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
149 
150     // Custom lowering since wasm shifts must have a scalar shift amount
151     for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
152       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
153         setOperationAction(Op, T, Custom);
154 
155     // Custom lower lane accesses to expand out variable indices
156     for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT})
157       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
158                      MVT::v2f64})
159         setOperationAction(Op, T, Custom);
160 
161     // There is no i8x16.mul instruction
162     setOperationAction(ISD::MUL, MVT::v16i8, Expand);
163 
164     // There is no vector conditional select instruction
165     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
166                    MVT::v2f64})
167       setOperationAction(ISD::SELECT_CC, T, Expand);
168 
169     // Expand integer operations supported for scalars but not SIMD
170     for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV,
171                     ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})
172       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
173         setOperationAction(Op, T, Expand);
174 
175     // But we do have integer min and max operations
176     for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
177       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
178         setOperationAction(Op, T, Legal);
179 
180     // Expand float operations supported for scalars but not SIMD
181     for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
182                     ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
183                     ISD::FEXP, ISD::FEXP2, ISD::FRINT})
184       for (auto T : {MVT::v4f32, MVT::v2f64})
185         setOperationAction(Op, T, Expand);
186 
187     // Expand operations not supported for i64x2 vectors
188     for (unsigned CC = 0; CC < ISD::SETCC_INVALID; ++CC)
189       setCondCodeAction(static_cast<ISD::CondCode>(CC), MVT::v2i64, Custom);
190 
191     // 64x2 conversions are not in the spec
192     for (auto Op :
193          {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT})
194       for (auto T : {MVT::v2i64, MVT::v2f64})
195         setOperationAction(Op, T, Expand);
196   }
197 
198   // As a special case, these operators use the type to mean the type to
199   // sign-extend from.
200   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
201   if (!Subtarget->hasSignExt()) {
202     // Sign extends are legal only when extending a vector extract
203     auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
204     for (auto T : {MVT::i8, MVT::i16, MVT::i32})
205       setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action);
206   }
207   for (auto T : MVT::integer_fixedlen_vector_valuetypes())
208     setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
209 
210   // Dynamic stack allocation: use the default expansion.
211   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
212   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
213   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);
214 
215   setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
216   setOperationAction(ISD::FrameIndex, MVT::i64, Custom);
217   setOperationAction(ISD::CopyToReg, MVT::Other, Custom);
218 
219   // Expand these forms; we pattern-match the forms that we can handle in isel.
220   for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
221     for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
222       setOperationAction(Op, T, Expand);
223 
224   // We have custom switch handling.
225   setOperationAction(ISD::BR_JT, MVT::Other, Custom);
226 
227   // WebAssembly doesn't have:
228   //  - Floating-point extending loads.
229   //  - Floating-point truncating stores.
230   //  - i1 extending loads.
231   //  - truncating SIMD stores and most extending loads
232   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
233   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
234   for (auto T : MVT::integer_valuetypes())
235     for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
236       setLoadExtAction(Ext, T, MVT::i1, Promote);
237   if (Subtarget->hasSIMD128()) {
238     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
239                    MVT::v2f64}) {
240       for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
241         if (MVT(T) != MemT) {
242           setTruncStoreAction(T, MemT, Expand);
243           for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
244             setLoadExtAction(Ext, T, MemT, Expand);
245         }
246       }
247     }
248     // But some vector extending loads are legal
249     for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
250       setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
251       setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
252       setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
253     }
254     // And some truncating stores are legal as well
255     setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
256     setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
257   }
258 
259   // Don't do anything clever with build_pairs
260   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
261 
262   // Trap lowers to wasm unreachable
263   setOperationAction(ISD::TRAP, MVT::Other, Legal);
264   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
265 
266   // Exception handling intrinsics
267   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
268   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
269 
270   setMaxAtomicSizeInBitsSupported(64);
271 
272   // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is
273   // consistent with the f64 and f128 names.
274   setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
275   setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
276 
277   // Define the emscripten name for return address helper.
278   // TODO: when implementing other Wasm backends, make this generic or only do
279   // this on emscripten depending on what they end up doing.
280   setLibcallName(RTLIB::RETURN_ADDRESS, "emscripten_return_address");
281 
282   // Always convert switches to br_tables unless there is only one case, which
283   // is equivalent to a simple branch. This reduces code size for wasm, and we
284   // defer possible jump table optimizations to the VM.
285   setMinimumJumpTableEntries(2);
286 }
287 
288 TargetLowering::AtomicExpansionKind
289 WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
290   // We have wasm instructions for these
291   switch (AI->getOperation()) {
292   case AtomicRMWInst::Add:
293   case AtomicRMWInst::Sub:
294   case AtomicRMWInst::And:
295   case AtomicRMWInst::Or:
296   case AtomicRMWInst::Xor:
297   case AtomicRMWInst::Xchg:
298     return AtomicExpansionKind::None;
299   default:
300     break;
301   }
302   return AtomicExpansionKind::CmpXChg;
303 }
304 
305 FastISel *WebAssemblyTargetLowering::createFastISel(
306     FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
307   return WebAssembly::createFastISel(FuncInfo, LibInfo);
308 }
309 
310 MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
311                                                       EVT VT) const {
312   unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
313   if (BitWidth > 1 && BitWidth < 8)
314     BitWidth = 8;
315 
316   if (BitWidth > 64) {
317     // The shift will be lowered to a libcall, and compiler-rt libcalls expect
318     // the count to be an i32.
319     BitWidth = 32;
320     assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) &&
321            "32-bit shift counts ought to be enough for anyone");
322   }
323 
324   MVT Result = MVT::getIntegerVT(BitWidth);
325   assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE &&
326          "Unable to represent scalar shift amount type");
327   return Result;
328 }
329 
330 // Lower an fp-to-int conversion operator from the LLVM opcode, which has an
331 // undefined result on invalid/overflow, to the WebAssembly opcode, which
332 // traps on invalid/overflow.
333 static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
334                                        MachineBasicBlock *BB,
335                                        const TargetInstrInfo &TII,
336                                        bool IsUnsigned, bool Int64,
337                                        bool Float64, unsigned LoweredOpcode) {
338   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
339 
340   Register OutReg = MI.getOperand(0).getReg();
341   Register InReg = MI.getOperand(1).getReg();
342 
343   unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
344   unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
345   unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
346   unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
347   unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
348   unsigned Eqz = WebAssembly::EQZ_I32;
349   unsigned And = WebAssembly::AND_I32;
350   int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
351   int64_t Substitute = IsUnsigned ? 0 : Limit;
352   double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
353   auto &Context = BB->getParent()->getFunction().getContext();
354   Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
355 
356   const BasicBlock *LLVMBB = BB->getBasicBlock();
357   MachineFunction *F = BB->getParent();
358   MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
359   MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
360   MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
361 
362   MachineFunction::iterator It = ++BB->getIterator();
363   F->insert(It, FalseMBB);
364   F->insert(It, TrueMBB);
365   F->insert(It, DoneMBB);
366 
367   // Transfer the remainder of BB and its successor edges to DoneMBB.
368   DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
369   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
370 
371   BB->addSuccessor(TrueMBB);
372   BB->addSuccessor(FalseMBB);
373   TrueMBB->addSuccessor(DoneMBB);
374   FalseMBB->addSuccessor(DoneMBB);
375 
376   unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
377   Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
378   Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
379   CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
380   EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
381   FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
382   TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
383 
384   MI.eraseFromParent();
385   // For signed numbers, we can do a single comparison to determine whether
386   // fabs(x) is within range.
387   if (IsUnsigned) {
388     Tmp0 = InReg;
389   } else {
390     BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
391   }
392   BuildMI(BB, DL, TII.get(FConst), Tmp1)
393       .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
394   BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
395 
396   // For unsigned numbers, we have to do a separate comparison with zero.
397   if (IsUnsigned) {
398     Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
399     Register SecondCmpReg =
400         MRI.createVirtualRegister(&WebAssembly::I32RegClass);
401     Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
402     BuildMI(BB, DL, TII.get(FConst), Tmp1)
403         .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
404     BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
405     BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
406     CmpReg = AndReg;
407   }
408 
409   BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
410 
411   // Create the CFG diamond to select between doing the conversion or using
412   // the substitute value.
413   BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
414   BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
415   BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
416   BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
417   BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
418       .addReg(FalseReg)
419       .addMBB(FalseMBB)
420       .addReg(TrueReg)
421       .addMBB(TrueMBB);
422 
423   return DoneMBB;
424 }
425 
426 static MachineBasicBlock *LowerCallResults(MachineInstr &CallResults,
427                                            DebugLoc DL, MachineBasicBlock *BB,
428                                            const TargetInstrInfo &TII) {
429   MachineInstr &CallParams = *CallResults.getPrevNode();
430   assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
431   assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
432          CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
433 
434   bool IsIndirect = CallParams.getOperand(0).isReg();
435   bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
436 
437   unsigned CallOp;
438   if (IsIndirect && IsRetCall) {
439     CallOp = WebAssembly::RET_CALL_INDIRECT;
440   } else if (IsIndirect) {
441     CallOp = WebAssembly::CALL_INDIRECT;
442   } else if (IsRetCall) {
443     CallOp = WebAssembly::RET_CALL;
444   } else {
445     CallOp = WebAssembly::CALL;
446   }
447 
448   MachineFunction &MF = *BB->getParent();
449   const MCInstrDesc &MCID = TII.get(CallOp);
450   MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
451 
452   // See if we must truncate the function pointer.
453   // CALL_INDIRECT takes an i32, but in wasm64 we represent function pointers
454   // as 64-bit for uniformity with other pointer types.
455   if (IsIndirect && MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()) {
456     Register Reg32 =
457         MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
458     auto &FnPtr = CallParams.getOperand(0);
459     BuildMI(*BB, CallResults.getIterator(), DL,
460             TII.get(WebAssembly::I32_WRAP_I64), Reg32)
461         .addReg(FnPtr.getReg());
462     FnPtr.setReg(Reg32);
463   }
464 
465   // Move the function pointer to the end of the arguments for indirect calls
466   if (IsIndirect) {
467     auto FnPtr = CallParams.getOperand(0);
468     CallParams.RemoveOperand(0);
469     CallParams.addOperand(FnPtr);
470   }
471 
472   for (auto Def : CallResults.defs())
473     MIB.add(Def);
474 
475   // Add placeholders for the type index and immediate flags
476   if (IsIndirect) {
477     MIB.addImm(0);
478     MIB.addImm(0);
479   }
480 
481   for (auto Use : CallParams.uses())
482     MIB.add(Use);
483 
484   BB->insert(CallResults.getIterator(), MIB);
485   CallParams.eraseFromParent();
486   CallResults.eraseFromParent();
487 
488   return BB;
489 }
490 
491 MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
492     MachineInstr &MI, MachineBasicBlock *BB) const {
493   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
494   DebugLoc DL = MI.getDebugLoc();
495 
496   switch (MI.getOpcode()) {
497   default:
498     llvm_unreachable("Unexpected instr type to insert");
499   case WebAssembly::FP_TO_SINT_I32_F32:
500     return LowerFPToInt(MI, DL, BB, TII, false, false, false,
501                         WebAssembly::I32_TRUNC_S_F32);
502   case WebAssembly::FP_TO_UINT_I32_F32:
503     return LowerFPToInt(MI, DL, BB, TII, true, false, false,
504                         WebAssembly::I32_TRUNC_U_F32);
505   case WebAssembly::FP_TO_SINT_I64_F32:
506     return LowerFPToInt(MI, DL, BB, TII, false, true, false,
507                         WebAssembly::I64_TRUNC_S_F32);
508   case WebAssembly::FP_TO_UINT_I64_F32:
509     return LowerFPToInt(MI, DL, BB, TII, true, true, false,
510                         WebAssembly::I64_TRUNC_U_F32);
511   case WebAssembly::FP_TO_SINT_I32_F64:
512     return LowerFPToInt(MI, DL, BB, TII, false, false, true,
513                         WebAssembly::I32_TRUNC_S_F64);
514   case WebAssembly::FP_TO_UINT_I32_F64:
515     return LowerFPToInt(MI, DL, BB, TII, true, false, true,
516                         WebAssembly::I32_TRUNC_U_F64);
517   case WebAssembly::FP_TO_SINT_I64_F64:
518     return LowerFPToInt(MI, DL, BB, TII, false, true, true,
519                         WebAssembly::I64_TRUNC_S_F64);
520   case WebAssembly::FP_TO_UINT_I64_F64:
521     return LowerFPToInt(MI, DL, BB, TII, true, true, true,
522                         WebAssembly::I64_TRUNC_U_F64);
523   case WebAssembly::CALL_RESULTS:
524   case WebAssembly::RET_CALL_RESULTS:
525     return LowerCallResults(MI, DL, BB, TII);
526   }
527 }
528 
529 const char *
530 WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {
531   switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
532   case WebAssemblyISD::FIRST_NUMBER:
533   case WebAssemblyISD::FIRST_MEM_OPCODE:
534     break;
535 #define HANDLE_NODETYPE(NODE)                                                  \
536   case WebAssemblyISD::NODE:                                                   \
537     return "WebAssemblyISD::" #NODE;
538 #define HANDLE_MEM_NODETYPE(NODE) HANDLE_NODETYPE(NODE)
539 #include "WebAssemblyISD.def"
540 #undef HANDLE_MEM_NODETYPE
541 #undef HANDLE_NODETYPE
542   }
543   return nullptr;
544 }
545 
546 std::pair<unsigned, const TargetRegisterClass *>
547 WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
548     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
549   // First, see if this is a constraint that directly corresponds to a
550   // WebAssembly register class.
551   if (Constraint.size() == 1) {
552     switch (Constraint[0]) {
553     case 'r':
554       assert(VT != MVT::iPTR && "Pointer MVT not expected here");
555       if (Subtarget->hasSIMD128() && VT.isVector()) {
556         if (VT.getSizeInBits() == 128)
557           return std::make_pair(0U, &WebAssembly::V128RegClass);
558       }
559       if (VT.isInteger() && !VT.isVector()) {
560         if (VT.getSizeInBits() <= 32)
561           return std::make_pair(0U, &WebAssembly::I32RegClass);
562         if (VT.getSizeInBits() <= 64)
563           return std::make_pair(0U, &WebAssembly::I64RegClass);
564       }
565       break;
566     default:
567       break;
568     }
569   }
570 
571   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
572 }
573 
574 bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const {
575   // Assume ctz is a relatively cheap operation.
576   return true;
577 }
578 
579 bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const {
580   // Assume clz is a relatively cheap operation.
581   return true;
582 }
583 
584 bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
585                                                       const AddrMode &AM,
586                                                       Type *Ty, unsigned AS,
587                                                       Instruction *I) const {
588   // WebAssembly offsets are added as unsigned without wrapping. The
589   // isLegalAddressingMode gives us no way to determine if wrapping could be
590   // happening, so we approximate this by accepting only non-negative offsets.
591   if (AM.BaseOffs < 0)
592     return false;
593 
594   // WebAssembly has no scale register operands.
595   if (AM.Scale != 0)
596     return false;
597 
598   // Everything else is legal.
599   return true;
600 }
601 
602 bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
603     EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/,
604     MachineMemOperand::Flags /*Flags*/, bool *Fast) const {
605   // WebAssembly supports unaligned accesses, though it should be declared
606   // with the p2align attribute on loads and stores which do so, and there
607   // may be a performance impact. We tell LLVM they're "fast" because
608   // for the kinds of things that LLVM uses this for (merging adjacent stores
609   // of constants, etc.), WebAssembly implementations will either want the
610   // unaligned access or they'll split anyway.
611   if (Fast)
612     *Fast = true;
613   return true;
614 }
615 
616 bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
617                                               AttributeList Attr) const {
618   // The current thinking is that wasm engines will perform this optimization,
619   // so we can save on code size.
620   return true;
621 }
622 
623 bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
624   EVT ExtT = ExtVal.getValueType();
625   EVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getValueType(0);
626   return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
627          (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
628          (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
629 }
630 
631 EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
632                                                   LLVMContext &C,
633                                                   EVT VT) const {
634   if (VT.isVector())
635     return VT.changeVectorElementTypeToInteger();
636 
637   // So far, all branch instructions in Wasm take an I32 condition.
638   // The default TargetLowering::getSetCCResultType returns the pointer size,
639   // which would be useful to reduce instruction counts when testing
640   // against 64-bit pointers/values if at some point Wasm supports that.
641   return EVT::getIntegerVT(C, 32);
642 }
643 
644 bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
645                                                    const CallInst &I,
646                                                    MachineFunction &MF,
647                                                    unsigned Intrinsic) const {
648   switch (Intrinsic) {
649   case Intrinsic::wasm_atomic_notify:
650     Info.opc = ISD::INTRINSIC_W_CHAIN;
651     Info.memVT = MVT::i32;
652     Info.ptrVal = I.getArgOperand(0);
653     Info.offset = 0;
654     Info.align = Align(4);
655     // atomic.notify instruction does not really load the memory specified with
656     // this argument, but MachineMemOperand should either be load or store, so
657     // we set this to a load.
658     // FIXME Volatile isn't really correct, but currently all LLVM atomic
659     // instructions are treated as volatiles in the backend, so we should be
660     // consistent. The same applies for wasm_atomic_wait intrinsics too.
661     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
662     return true;
663   case Intrinsic::wasm_atomic_wait_i32:
664     Info.opc = ISD::INTRINSIC_W_CHAIN;
665     Info.memVT = MVT::i32;
666     Info.ptrVal = I.getArgOperand(0);
667     Info.offset = 0;
668     Info.align = Align(4);
669     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
670     return true;
671   case Intrinsic::wasm_atomic_wait_i64:
672     Info.opc = ISD::INTRINSIC_W_CHAIN;
673     Info.memVT = MVT::i64;
674     Info.ptrVal = I.getArgOperand(0);
675     Info.offset = 0;
676     Info.align = Align(8);
677     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
678     return true;
679   case Intrinsic::wasm_load32_zero:
680   case Intrinsic::wasm_load64_zero:
681     Info.opc = ISD::INTRINSIC_W_CHAIN;
682     Info.memVT = Intrinsic == Intrinsic::wasm_load32_zero ? MVT::i32 : MVT::i64;
683     Info.ptrVal = I.getArgOperand(0);
684     Info.offset = 0;
685     Info.align = Info.memVT == MVT::i32 ? Align(4) : Align(8);
686     Info.flags = MachineMemOperand::MOLoad;
687     return true;
688   case Intrinsic::wasm_load8_lane:
689   case Intrinsic::wasm_load16_lane:
690   case Intrinsic::wasm_load32_lane:
691   case Intrinsic::wasm_load64_lane:
692   case Intrinsic::wasm_store8_lane:
693   case Intrinsic::wasm_store16_lane:
694   case Intrinsic::wasm_store32_lane:
695   case Intrinsic::wasm_store64_lane: {
696     MVT MemVT;
697     Align MemAlign;
698     switch (Intrinsic) {
699     case Intrinsic::wasm_load8_lane:
700     case Intrinsic::wasm_store8_lane:
701       MemVT = MVT::i8;
702       MemAlign = Align(1);
703       break;
704     case Intrinsic::wasm_load16_lane:
705     case Intrinsic::wasm_store16_lane:
706       MemVT = MVT::i16;
707       MemAlign = Align(2);
708       break;
709     case Intrinsic::wasm_load32_lane:
710     case Intrinsic::wasm_store32_lane:
711       MemVT = MVT::i32;
712       MemAlign = Align(4);
713       break;
714     case Intrinsic::wasm_load64_lane:
715     case Intrinsic::wasm_store64_lane:
716       MemVT = MVT::i64;
717       MemAlign = Align(8);
718       break;
719     default:
720       llvm_unreachable("unexpected intrinsic");
721     }
722     if (Intrinsic == Intrinsic::wasm_load8_lane ||
723         Intrinsic == Intrinsic::wasm_load16_lane ||
724         Intrinsic == Intrinsic::wasm_load32_lane ||
725         Intrinsic == Intrinsic::wasm_load64_lane) {
726       Info.opc = ISD::INTRINSIC_W_CHAIN;
727       Info.flags = MachineMemOperand::MOLoad;
728     } else {
729       Info.opc = ISD::INTRINSIC_VOID;
730       Info.flags = MachineMemOperand::MOStore;
731     }
732     Info.ptrVal = I.getArgOperand(0);
733     Info.memVT = MemVT;
734     Info.offset = 0;
735     Info.align = MemAlign;
736     return true;
737   }
738   default:
739     return false;
740   }
741 }
742 
743 //===----------------------------------------------------------------------===//
744 // WebAssembly Lowering private implementation.
745 //===----------------------------------------------------------------------===//
746 
747 //===----------------------------------------------------------------------===//
748 // Lowering Code
749 //===----------------------------------------------------------------------===//
750 
751 static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
752   MachineFunction &MF = DAG.getMachineFunction();
753   DAG.getContext()->diagnose(
754       DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
755 }
756 
757 // Test whether the given calling convention is supported.
758 static bool callingConvSupported(CallingConv::ID CallConv) {
759   // We currently support the language-independent target-independent
760   // conventions. We don't yet have a way to annotate calls with properties like
761   // "cold", and we don't have any call-clobbered registers, so these are mostly
762   // all handled the same.
763   return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
764          CallConv == CallingConv::Cold ||
765          CallConv == CallingConv::PreserveMost ||
766          CallConv == CallingConv::PreserveAll ||
767          CallConv == CallingConv::CXX_FAST_TLS ||
768          CallConv == CallingConv::WASM_EmscriptenInvoke ||
769          CallConv == CallingConv::Swift;
770 }
771 
772 SDValue
773 WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
774                                      SmallVectorImpl<SDValue> &InVals) const {
775   SelectionDAG &DAG = CLI.DAG;
776   SDLoc DL = CLI.DL;
777   SDValue Chain = CLI.Chain;
778   SDValue Callee = CLI.Callee;
779   MachineFunction &MF = DAG.getMachineFunction();
780   auto Layout = MF.getDataLayout();
781 
782   CallingConv::ID CallConv = CLI.CallConv;
783   if (!callingConvSupported(CallConv))
784     fail(DL, DAG,
785          "WebAssembly doesn't support language-specific or target-specific "
786          "calling conventions yet");
787   if (CLI.IsPatchPoint)
788     fail(DL, DAG, "WebAssembly doesn't support patch point yet");
789 
790   if (CLI.IsTailCall) {
791     auto NoTail = [&](const char *Msg) {
792       if (CLI.CB && CLI.CB->isMustTailCall())
793         fail(DL, DAG, Msg);
794       CLI.IsTailCall = false;
795     };
796 
797     if (!Subtarget->hasTailCall())
798       NoTail("WebAssembly 'tail-call' feature not enabled");
799 
800     // Varargs calls cannot be tail calls because the buffer is on the stack
801     if (CLI.IsVarArg)
802       NoTail("WebAssembly does not support varargs tail calls");
803 
804     // Do not tail call unless caller and callee return types match
805     const Function &F = MF.getFunction();
806     const TargetMachine &TM = getTargetMachine();
807     Type *RetTy = F.getReturnType();
808     SmallVector<MVT, 4> CallerRetTys;
809     SmallVector<MVT, 4> CalleeRetTys;
810     computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
811     computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
812     bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
813                       std::equal(CallerRetTys.begin(), CallerRetTys.end(),
814                                  CalleeRetTys.begin());
815     if (!TypesMatch)
816       NoTail("WebAssembly tail call requires caller and callee return types to "
817              "match");
818 
819     // If pointers to local stack values are passed, we cannot tail call
820     if (CLI.CB) {
821       for (auto &Arg : CLI.CB->args()) {
822         Value *Val = Arg.get();
823         // Trace the value back through pointer operations
824         while (true) {
825           Value *Src = Val->stripPointerCastsAndAliases();
826           if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
827             Src = GEP->getPointerOperand();
828           if (Val == Src)
829             break;
830           Val = Src;
831         }
832         if (isa<AllocaInst>(Val)) {
833           NoTail(
834               "WebAssembly does not support tail calling with stack arguments");
835           break;
836         }
837       }
838     }
839   }
840 
841   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
842   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
843   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
844 
845   // The generic code may have added an sret argument. If we're lowering an
846   // invoke function, the ABI requires that the function pointer be the first
847   // argument, so we may have to swap the arguments.
848   if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
849       Outs[0].Flags.isSRet()) {
850     std::swap(Outs[0], Outs[1]);
851     std::swap(OutVals[0], OutVals[1]);
852   }
853 
854   bool HasSwiftSelfArg = false;
855   bool HasSwiftErrorArg = false;
856   unsigned NumFixedArgs = 0;
857   for (unsigned I = 0; I < Outs.size(); ++I) {
858     const ISD::OutputArg &Out = Outs[I];
859     SDValue &OutVal = OutVals[I];
860     HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
861     HasSwiftErrorArg |= Out.Flags.isSwiftError();
862     if (Out.Flags.isNest())
863       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
864     if (Out.Flags.isInAlloca())
865       fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
866     if (Out.Flags.isInConsecutiveRegs())
867       fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
868     if (Out.Flags.isInConsecutiveRegsLast())
869       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
870     if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
871       auto &MFI = MF.getFrameInfo();
872       int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
873                                      Out.Flags.getNonZeroByValAlign(),
874                                      /*isSS=*/false);
875       SDValue SizeNode =
876           DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
877       SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
878       Chain = DAG.getMemcpy(
879           Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getNonZeroByValAlign(),
880           /*isVolatile*/ false, /*AlwaysInline=*/false,
881           /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
882       OutVal = FINode;
883     }
884     // Count the number of fixed args *after* legalization.
885     NumFixedArgs += Out.IsFixed;
886   }
887 
888   bool IsVarArg = CLI.IsVarArg;
889   auto PtrVT = getPointerTy(Layout);
890 
891   // For swiftcc, emit additional swiftself and swifterror arguments
892   // if there aren't. These additional arguments are also added for callee
893   // signature They are necessary to match callee and caller signature for
894   // indirect call.
895   if (CallConv == CallingConv::Swift) {
896     if (!HasSwiftSelfArg) {
897       NumFixedArgs++;
898       ISD::OutputArg Arg;
899       Arg.Flags.setSwiftSelf();
900       CLI.Outs.push_back(Arg);
901       SDValue ArgVal = DAG.getUNDEF(PtrVT);
902       CLI.OutVals.push_back(ArgVal);
903     }
904     if (!HasSwiftErrorArg) {
905       NumFixedArgs++;
906       ISD::OutputArg Arg;
907       Arg.Flags.setSwiftError();
908       CLI.Outs.push_back(Arg);
909       SDValue ArgVal = DAG.getUNDEF(PtrVT);
910       CLI.OutVals.push_back(ArgVal);
911     }
912   }
913 
914   // Analyze operands of the call, assigning locations to each operand.
915   SmallVector<CCValAssign, 16> ArgLocs;
916   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
917 
918   if (IsVarArg) {
919     // Outgoing non-fixed arguments are placed in a buffer. First
920     // compute their offsets and the total amount of buffer space needed.
921     for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
922       const ISD::OutputArg &Out = Outs[I];
923       SDValue &Arg = OutVals[I];
924       EVT VT = Arg.getValueType();
925       assert(VT != MVT::iPTR && "Legalized args should be concrete");
926       Type *Ty = VT.getTypeForEVT(*DAG.getContext());
927       Align Alignment =
928           std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));
929       unsigned Offset =
930           CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);
931       CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
932                                         Offset, VT.getSimpleVT(),
933                                         CCValAssign::Full));
934     }
935   }
936 
937   unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
938 
939   SDValue FINode;
940   if (IsVarArg && NumBytes) {
941     // For non-fixed arguments, next emit stores to store the argument values
942     // to the stack buffer at the offsets computed above.
943     int FI = MF.getFrameInfo().CreateStackObject(NumBytes,
944                                                  Layout.getStackAlignment(),
945                                                  /*isSS=*/false);
946     unsigned ValNo = 0;
947     SmallVector<SDValue, 8> Chains;
948     for (SDValue Arg :
949          make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) {
950       assert(ArgLocs[ValNo].getValNo() == ValNo &&
951              "ArgLocs should remain in order and only hold varargs args");
952       unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
953       FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
954       SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
955                                 DAG.getConstant(Offset, DL, PtrVT));
956       Chains.push_back(
957           DAG.getStore(Chain, DL, Arg, Add,
958                        MachinePointerInfo::getFixedStack(MF, FI, Offset)));
959     }
960     if (!Chains.empty())
961       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
962   } else if (IsVarArg) {
963     FINode = DAG.getIntPtrConstant(0, DL);
964   }
965 
966   if (Callee->getOpcode() == ISD::GlobalAddress) {
967     // If the callee is a GlobalAddress node (quite common, every direct call
968     // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
969     // doesn't at MO_GOT which is not needed for direct calls.
970     GlobalAddressSDNode* GA = cast<GlobalAddressSDNode>(Callee);
971     Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
972                                         getPointerTy(DAG.getDataLayout()),
973                                         GA->getOffset());
974     Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
975                          getPointerTy(DAG.getDataLayout()), Callee);
976   }
977 
978   // Compute the operands for the CALLn node.
979   SmallVector<SDValue, 16> Ops;
980   Ops.push_back(Chain);
981   Ops.push_back(Callee);
982 
983   // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
984   // isn't reliable.
985   Ops.append(OutVals.begin(),
986              IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
987   // Add a pointer to the vararg buffer.
988   if (IsVarArg)
989     Ops.push_back(FINode);
990 
991   SmallVector<EVT, 8> InTys;
992   for (const auto &In : Ins) {
993     assert(!In.Flags.isByVal() && "byval is not valid for return values");
994     assert(!In.Flags.isNest() && "nest is not valid for return values");
995     if (In.Flags.isInAlloca())
996       fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
997     if (In.Flags.isInConsecutiveRegs())
998       fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
999     if (In.Flags.isInConsecutiveRegsLast())
1000       fail(DL, DAG,
1001            "WebAssembly hasn't implemented cons regs last return values");
1002     // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1003     // registers.
1004     InTys.push_back(In.VT);
1005   }
1006 
1007   if (CLI.IsTailCall) {
1008     // ret_calls do not return values to the current frame
1009     SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1010     return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
1011   }
1012 
1013   InTys.push_back(MVT::Other);
1014   SDVTList InTyList = DAG.getVTList(InTys);
1015   SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
1016 
1017   for (size_t I = 0; I < Ins.size(); ++I)
1018     InVals.push_back(Res.getValue(I));
1019 
1020   // Return the chain
1021   return Res.getValue(Ins.size());
1022 }
1023 
1024 bool WebAssemblyTargetLowering::CanLowerReturn(
1025     CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
1026     const SmallVectorImpl<ISD::OutputArg> &Outs,
1027     LLVMContext & /*Context*/) const {
1028   // WebAssembly can only handle returning tuples with multivalue enabled
1029   return Subtarget->hasMultivalue() || Outs.size() <= 1;
1030 }
1031 
1032 SDValue WebAssemblyTargetLowering::LowerReturn(
1033     SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
1034     const SmallVectorImpl<ISD::OutputArg> &Outs,
1035     const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1036     SelectionDAG &DAG) const {
1037   assert((Subtarget->hasMultivalue() || Outs.size() <= 1) &&
1038          "MVP WebAssembly can only return up to one value");
1039   if (!callingConvSupported(CallConv))
1040     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1041 
1042   SmallVector<SDValue, 4> RetOps(1, Chain);
1043   RetOps.append(OutVals.begin(), OutVals.end());
1044   Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
1045 
1046   // Record the number and types of the return values.
1047   for (const ISD::OutputArg &Out : Outs) {
1048     assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1049     assert(!Out.Flags.isNest() && "nest is not valid for return values");
1050     assert(Out.IsFixed && "non-fixed return value is not valid");
1051     if (Out.Flags.isInAlloca())
1052       fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
1053     if (Out.Flags.isInConsecutiveRegs())
1054       fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
1055     if (Out.Flags.isInConsecutiveRegsLast())
1056       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
1057   }
1058 
1059   return Chain;
1060 }
1061 
1062 SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1063     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1064     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1065     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1066   if (!callingConvSupported(CallConv))
1067     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1068 
1069   MachineFunction &MF = DAG.getMachineFunction();
1070   auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1071 
1072   // Set up the incoming ARGUMENTS value, which serves to represent the liveness
1073   // of the incoming values before they're represented by virtual registers.
1074   MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
1075 
1076   bool HasSwiftErrorArg = false;
1077   bool HasSwiftSelfArg = false;
1078   for (const ISD::InputArg &In : Ins) {
1079     HasSwiftSelfArg |= In.Flags.isSwiftSelf();
1080     HasSwiftErrorArg |= In.Flags.isSwiftError();
1081     if (In.Flags.isInAlloca())
1082       fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1083     if (In.Flags.isNest())
1084       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1085     if (In.Flags.isInConsecutiveRegs())
1086       fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1087     if (In.Flags.isInConsecutiveRegsLast())
1088       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1089     // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1090     // registers.
1091     InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
1092                                            DAG.getTargetConstant(InVals.size(),
1093                                                                  DL, MVT::i32))
1094                              : DAG.getUNDEF(In.VT));
1095 
1096     // Record the number and types of arguments.
1097     MFI->addParam(In.VT);
1098   }
1099 
1100   // For swiftcc, emit additional swiftself and swifterror arguments
1101   // if there aren't. These additional arguments are also added for callee
1102   // signature They are necessary to match callee and caller signature for
1103   // indirect call.
1104   auto PtrVT = getPointerTy(MF.getDataLayout());
1105   if (CallConv == CallingConv::Swift) {
1106     if (!HasSwiftSelfArg) {
1107       MFI->addParam(PtrVT);
1108     }
1109     if (!HasSwiftErrorArg) {
1110       MFI->addParam(PtrVT);
1111     }
1112   }
1113   // Varargs are copied into a buffer allocated by the caller, and a pointer to
1114   // the buffer is passed as an argument.
1115   if (IsVarArg) {
1116     MVT PtrVT = getPointerTy(MF.getDataLayout());
1117     Register VarargVreg =
1118         MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT));
1119     MFI->setVarargBufferVreg(VarargVreg);
1120     Chain = DAG.getCopyToReg(
1121         Chain, DL, VarargVreg,
1122         DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
1123                     DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
1124     MFI->addParam(PtrVT);
1125   }
1126 
1127   // Record the number and types of arguments and results.
1128   SmallVector<MVT, 4> Params;
1129   SmallVector<MVT, 4> Results;
1130   computeSignatureVTs(MF.getFunction().getFunctionType(), &MF.getFunction(),
1131                       MF.getFunction(), DAG.getTarget(), Params, Results);
1132   for (MVT VT : Results)
1133     MFI->addResult(VT);
1134   // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1135   // the param logic here with ComputeSignatureVTs
1136   assert(MFI->getParams().size() == Params.size() &&
1137          std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1138                     Params.begin()));
1139 
1140   return Chain;
1141 }
1142 
1143 void WebAssemblyTargetLowering::ReplaceNodeResults(
1144     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
1145   switch (N->getOpcode()) {
1146   case ISD::SIGN_EXTEND_INREG:
1147     // Do not add any results, signifying that N should not be custom lowered
1148     // after all. This happens because simd128 turns on custom lowering for
1149     // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1150     // illegal type.
1151     break;
1152   default:
1153     llvm_unreachable(
1154         "ReplaceNodeResults not implemented for this op for WebAssembly!");
1155   }
1156 }
1157 
1158 //===----------------------------------------------------------------------===//
1159 //  Custom lowering hooks.
1160 //===----------------------------------------------------------------------===//
1161 
1162 SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1163                                                   SelectionDAG &DAG) const {
1164   SDLoc DL(Op);
1165   switch (Op.getOpcode()) {
1166   default:
1167     llvm_unreachable("unimplemented operation lowering");
1168     return SDValue();
1169   case ISD::FrameIndex:
1170     return LowerFrameIndex(Op, DAG);
1171   case ISD::GlobalAddress:
1172     return LowerGlobalAddress(Op, DAG);
1173   case ISD::ExternalSymbol:
1174     return LowerExternalSymbol(Op, DAG);
1175   case ISD::JumpTable:
1176     return LowerJumpTable(Op, DAG);
1177   case ISD::BR_JT:
1178     return LowerBR_JT(Op, DAG);
1179   case ISD::VASTART:
1180     return LowerVASTART(Op, DAG);
1181   case ISD::BlockAddress:
1182   case ISD::BRIND:
1183     fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1184     return SDValue();
1185   case ISD::RETURNADDR:
1186     return LowerRETURNADDR(Op, DAG);
1187   case ISD::FRAMEADDR:
1188     return LowerFRAMEADDR(Op, DAG);
1189   case ISD::CopyToReg:
1190     return LowerCopyToReg(Op, DAG);
1191   case ISD::EXTRACT_VECTOR_ELT:
1192   case ISD::INSERT_VECTOR_ELT:
1193     return LowerAccessVectorElement(Op, DAG);
1194   case ISD::INTRINSIC_VOID:
1195   case ISD::INTRINSIC_WO_CHAIN:
1196   case ISD::INTRINSIC_W_CHAIN:
1197     return LowerIntrinsic(Op, DAG);
1198   case ISD::SIGN_EXTEND_INREG:
1199     return LowerSIGN_EXTEND_INREG(Op, DAG);
1200   case ISD::BUILD_VECTOR:
1201     return LowerBUILD_VECTOR(Op, DAG);
1202   case ISD::VECTOR_SHUFFLE:
1203     return LowerVECTOR_SHUFFLE(Op, DAG);
1204   case ISD::SETCC:
1205     return LowerSETCC(Op, DAG);
1206   case ISD::SHL:
1207   case ISD::SRA:
1208   case ISD::SRL:
1209     return LowerShift(Op, DAG);
1210   }
1211 }
1212 
1213 SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1214                                                   SelectionDAG &DAG) const {
1215   SDValue Src = Op.getOperand(2);
1216   if (isa<FrameIndexSDNode>(Src.getNode())) {
1217     // CopyToReg nodes don't support FrameIndex operands. Other targets select
1218     // the FI to some LEA-like instruction, but since we don't have that, we
1219     // need to insert some kind of instruction that can take an FI operand and
1220     // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1221     // local.copy between Op and its FI operand.
1222     SDValue Chain = Op.getOperand(0);
1223     SDLoc DL(Op);
1224     unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
1225     EVT VT = Src.getValueType();
1226     SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
1227                                                    : WebAssembly::COPY_I64,
1228                                     DL, VT, Src),
1229                  0);
1230     return Op.getNode()->getNumValues() == 1
1231                ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
1232                : DAG.getCopyToReg(Chain, DL, Reg, Copy,
1233                                   Op.getNumOperands() == 4 ? Op.getOperand(3)
1234                                                            : SDValue());
1235   }
1236   return SDValue();
1237 }
1238 
1239 SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
1240                                                    SelectionDAG &DAG) const {
1241   int FI = cast<FrameIndexSDNode>(Op)->getIndex();
1242   return DAG.getTargetFrameIndex(FI, Op.getValueType());
1243 }
1244 
1245 SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
1246                                                    SelectionDAG &DAG) const {
1247   SDLoc DL(Op);
1248 
1249   if (!Subtarget->getTargetTriple().isOSEmscripten()) {
1250     fail(DL, DAG,
1251          "Non-Emscripten WebAssembly hasn't implemented "
1252          "__builtin_return_address");
1253     return SDValue();
1254   }
1255 
1256   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1257     return SDValue();
1258 
1259   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1260   MakeLibCallOptions CallOptions;
1261   return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
1262                      {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
1263       .first;
1264 }
1265 
1266 SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
1267                                                   SelectionDAG &DAG) const {
1268   // Non-zero depths are not supported by WebAssembly currently. Use the
1269   // legalizer's default expansion, which is to return 0 (what this function is
1270   // documented to do).
1271   if (Op.getConstantOperandVal(0) > 0)
1272     return SDValue();
1273 
1274   DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
1275   EVT VT = Op.getValueType();
1276   Register FP =
1277       Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
1278   return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
1279 }
1280 
1281 SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
1282                                                       SelectionDAG &DAG) const {
1283   SDLoc DL(Op);
1284   const auto *GA = cast<GlobalAddressSDNode>(Op);
1285   EVT VT = Op.getValueType();
1286   assert(GA->getTargetFlags() == 0 &&
1287          "Unexpected target flags on generic GlobalAddressSDNode");
1288   if (GA->getAddressSpace() != 0)
1289     fail(DL, DAG, "WebAssembly only expects the 0 address space");
1290 
1291   unsigned OperandFlags = 0;
1292   if (isPositionIndependent()) {
1293     const GlobalValue *GV = GA->getGlobal();
1294     if (getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) {
1295       MachineFunction &MF = DAG.getMachineFunction();
1296       MVT PtrVT = getPointerTy(MF.getDataLayout());
1297       const char *BaseName;
1298       if (GV->getValueType()->isFunctionTy()) {
1299         BaseName = MF.createExternalSymbolName("__table_base");
1300         OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL;
1301       }
1302       else {
1303         BaseName = MF.createExternalSymbolName("__memory_base");
1304         OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL;
1305       }
1306       SDValue BaseAddr =
1307           DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
1308                       DAG.getTargetExternalSymbol(BaseName, PtrVT));
1309 
1310       SDValue SymAddr = DAG.getNode(
1311           WebAssemblyISD::WrapperPIC, DL, VT,
1312           DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
1313                                      OperandFlags));
1314 
1315       return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
1316     } else {
1317       OperandFlags = WebAssemblyII::MO_GOT;
1318     }
1319   }
1320 
1321   return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1322                      DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
1323                                                 GA->getOffset(), OperandFlags));
1324 }
1325 
1326 SDValue
1327 WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
1328                                                SelectionDAG &DAG) const {
1329   SDLoc DL(Op);
1330   const auto *ES = cast<ExternalSymbolSDNode>(Op);
1331   EVT VT = Op.getValueType();
1332   assert(ES->getTargetFlags() == 0 &&
1333          "Unexpected target flags on generic ExternalSymbolSDNode");
1334   return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1335                      DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
1336 }
1337 
1338 SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
1339                                                   SelectionDAG &DAG) const {
1340   // There's no need for a Wrapper node because we always incorporate a jump
1341   // table operand into a BR_TABLE instruction, rather than ever
1342   // materializing it in a register.
1343   const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1344   return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
1345                                 JT->getTargetFlags());
1346 }
1347 
1348 SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
1349                                               SelectionDAG &DAG) const {
1350   SDLoc DL(Op);
1351   SDValue Chain = Op.getOperand(0);
1352   const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
1353   SDValue Index = Op.getOperand(2);
1354   assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
1355 
1356   SmallVector<SDValue, 8> Ops;
1357   Ops.push_back(Chain);
1358   Ops.push_back(Index);
1359 
1360   MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
1361   const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
1362 
1363   // Add an operand for each case.
1364   for (auto MBB : MBBs)
1365     Ops.push_back(DAG.getBasicBlock(MBB));
1366 
1367   // Add the first MBB as a dummy default target for now. This will be replaced
1368   // with the proper default target (and the preceding range check eliminated)
1369   // if possible by WebAssemblyFixBrTableDefaults.
1370   Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));
1371   return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
1372 }
1373 
1374 SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
1375                                                 SelectionDAG &DAG) const {
1376   SDLoc DL(Op);
1377   EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
1378 
1379   auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
1380   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1381 
1382   SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
1383                                     MFI->getVarargBufferVreg(), PtrVT);
1384   return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
1385                       MachinePointerInfo(SV));
1386 }
1387 
1388 SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
1389                                                   SelectionDAG &DAG) const {
1390   MachineFunction &MF = DAG.getMachineFunction();
1391   unsigned IntNo;
1392   switch (Op.getOpcode()) {
1393   case ISD::INTRINSIC_VOID:
1394   case ISD::INTRINSIC_W_CHAIN:
1395     IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1396     break;
1397   case ISD::INTRINSIC_WO_CHAIN:
1398     IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1399     break;
1400   default:
1401     llvm_unreachable("Invalid intrinsic");
1402   }
1403   SDLoc DL(Op);
1404 
1405   switch (IntNo) {
1406   default:
1407     return SDValue(); // Don't custom lower most intrinsics.
1408 
1409   case Intrinsic::wasm_lsda: {
1410     EVT VT = Op.getValueType();
1411     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1412     MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
1413     auto &Context = MF.getMMI().getContext();
1414     MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") +
1415                                             Twine(MF.getFunctionNumber()));
1416     return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1417                        DAG.getMCSymbol(S, PtrVT));
1418   }
1419 
1420   case Intrinsic::wasm_throw: {
1421     // We only support C++ exceptions for now
1422     int Tag = cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1423     if (Tag != CPP_EXCEPTION)
1424       llvm_unreachable("Invalid tag!");
1425     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1426     MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
1427     const char *SymName = MF.createExternalSymbolName("__cpp_exception");
1428     SDValue SymNode = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
1429                                   DAG.getTargetExternalSymbol(SymName, PtrVT));
1430     return DAG.getNode(WebAssemblyISD::THROW, DL,
1431                        MVT::Other, // outchain type
1432                        {
1433                            Op.getOperand(0), // inchain
1434                            SymNode,          // exception symbol
1435                            Op.getOperand(3)  // thrown value
1436                        });
1437   }
1438 
1439   case Intrinsic::wasm_shuffle: {
1440     // Drop in-chain and replace undefs, but otherwise pass through unchanged
1441     SDValue Ops[18];
1442     size_t OpIdx = 0;
1443     Ops[OpIdx++] = Op.getOperand(1);
1444     Ops[OpIdx++] = Op.getOperand(2);
1445     while (OpIdx < 18) {
1446       const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
1447       if (MaskIdx.isUndef() ||
1448           cast<ConstantSDNode>(MaskIdx.getNode())->getZExtValue() >= 32) {
1449         Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32);
1450       } else {
1451         Ops[OpIdx++] = MaskIdx;
1452       }
1453     }
1454     return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
1455   }
1456   }
1457 }
1458 
1459 SDValue
1460 WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
1461                                                   SelectionDAG &DAG) const {
1462   SDLoc DL(Op);
1463   // If sign extension operations are disabled, allow sext_inreg only if operand
1464   // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
1465   // extension operations, but allowing sext_inreg in this context lets us have
1466   // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
1467   // everywhere would be simpler in this file, but would necessitate large and
1468   // brittle patterns to undo the expansion and select extract_lane_s
1469   // instructions.
1470   assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
1471   if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1472     return SDValue();
1473 
1474   const SDValue &Extract = Op.getOperand(0);
1475   MVT VecT = Extract.getOperand(0).getSimpleValueType();
1476   if (VecT.getVectorElementType().getSizeInBits() > 32)
1477     return SDValue();
1478   MVT ExtractedLaneT =
1479       cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();
1480   MVT ExtractedVecT =
1481       MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
1482   if (ExtractedVecT == VecT)
1483     return Op;
1484 
1485   // Bitcast vector to appropriate type to ensure ISel pattern coverage
1486   const SDNode *Index = Extract.getOperand(1).getNode();
1487   if (!isa<ConstantSDNode>(Index))
1488     return SDValue();
1489   unsigned IndexVal = cast<ConstantSDNode>(Index)->getZExtValue();
1490   unsigned Scale =
1491       ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
1492   assert(Scale > 1);
1493   SDValue NewIndex =
1494       DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));
1495   SDValue NewExtract = DAG.getNode(
1496       ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(),
1497       DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
1498   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
1499                      Op.getOperand(1));
1500 }
1501 
1502 SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
1503                                                      SelectionDAG &DAG) const {
1504   SDLoc DL(Op);
1505   const EVT VecT = Op.getValueType();
1506   const EVT LaneT = Op.getOperand(0).getValueType();
1507   const size_t Lanes = Op.getNumOperands();
1508   bool CanSwizzle = VecT == MVT::v16i8;
1509 
1510   // BUILD_VECTORs are lowered to the instruction that initializes the highest
1511   // possible number of lanes at once followed by a sequence of replace_lane
1512   // instructions to individually initialize any remaining lanes.
1513 
1514   // TODO: Tune this. For example, lanewise swizzling is very expensive, so
1515   // swizzled lanes should be given greater weight.
1516 
1517   // TODO: Investigate building vectors by shuffling together vectors built by
1518   // separately specialized means.
1519 
1520   auto IsConstant = [](const SDValue &V) {
1521     return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
1522   };
1523 
1524   // Returns the source vector and index vector pair if they exist. Checks for:
1525   //   (extract_vector_elt
1526   //     $src,
1527   //     (sign_extend_inreg (extract_vector_elt $indices, $i))
1528   //   )
1529   auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
1530     auto Bail = std::make_pair(SDValue(), SDValue());
1531     if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1532       return Bail;
1533     const SDValue &SwizzleSrc = Lane->getOperand(0);
1534     const SDValue &IndexExt = Lane->getOperand(1);
1535     if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
1536       return Bail;
1537     const SDValue &Index = IndexExt->getOperand(0);
1538     if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1539       return Bail;
1540     const SDValue &SwizzleIndices = Index->getOperand(0);
1541     if (SwizzleSrc.getValueType() != MVT::v16i8 ||
1542         SwizzleIndices.getValueType() != MVT::v16i8 ||
1543         Index->getOperand(1)->getOpcode() != ISD::Constant ||
1544         Index->getConstantOperandVal(1) != I)
1545       return Bail;
1546     return std::make_pair(SwizzleSrc, SwizzleIndices);
1547   };
1548 
1549   using ValueEntry = std::pair<SDValue, size_t>;
1550   SmallVector<ValueEntry, 16> SplatValueCounts;
1551 
1552   using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
1553   SmallVector<SwizzleEntry, 16> SwizzleCounts;
1554 
1555   auto AddCount = [](auto &Counts, const auto &Val) {
1556     auto CountIt = std::find_if(Counts.begin(), Counts.end(),
1557                                 [&Val](auto E) { return E.first == Val; });
1558     if (CountIt == Counts.end()) {
1559       Counts.emplace_back(Val, 1);
1560     } else {
1561       CountIt->second++;
1562     }
1563   };
1564 
1565   auto GetMostCommon = [](auto &Counts) {
1566     auto CommonIt =
1567         std::max_element(Counts.begin(), Counts.end(),
1568                          [](auto A, auto B) { return A.second < B.second; });
1569     assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
1570     return *CommonIt;
1571   };
1572 
1573   size_t NumConstantLanes = 0;
1574 
1575   // Count eligible lanes for each type of vector creation op
1576   for (size_t I = 0; I < Lanes; ++I) {
1577     const SDValue &Lane = Op->getOperand(I);
1578     if (Lane.isUndef())
1579       continue;
1580 
1581     AddCount(SplatValueCounts, Lane);
1582 
1583     if (IsConstant(Lane)) {
1584       NumConstantLanes++;
1585     } else if (CanSwizzle) {
1586       auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
1587       if (SwizzleSrcs.first)
1588         AddCount(SwizzleCounts, SwizzleSrcs);
1589     }
1590   }
1591 
1592   SDValue SplatValue;
1593   size_t NumSplatLanes;
1594   std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
1595 
1596   SDValue SwizzleSrc;
1597   SDValue SwizzleIndices;
1598   size_t NumSwizzleLanes = 0;
1599   if (SwizzleCounts.size())
1600     std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
1601                           NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
1602 
1603   // Predicate returning true if the lane is properly initialized by the
1604   // original instruction
1605   std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
1606   SDValue Result;
1607   // Prefer swizzles over vector consts over splats
1608   if (NumSwizzleLanes >= NumSplatLanes &&
1609       (!Subtarget->hasUnimplementedSIMD128() ||
1610        NumSwizzleLanes >= NumConstantLanes)) {
1611     Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
1612                          SwizzleIndices);
1613     auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
1614     IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
1615       return Swizzled == GetSwizzleSrcs(I, Lane);
1616     };
1617   } else if (NumConstantLanes >= NumSplatLanes &&
1618              Subtarget->hasUnimplementedSIMD128()) {
1619     // If we support v128.const, emit it directly
1620     SmallVector<SDValue, 16> ConstLanes;
1621     for (const SDValue &Lane : Op->op_values()) {
1622       if (IsConstant(Lane)) {
1623         ConstLanes.push_back(Lane);
1624       } else if (LaneT.isFloatingPoint()) {
1625         ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
1626       } else {
1627         ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
1628       }
1629     }
1630     Result = DAG.getBuildVector(VecT, DL, ConstLanes);
1631     IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
1632       return IsConstant(Lane);
1633     };
1634   } else if (NumConstantLanes >= NumSplatLanes && VecT.isInteger()) {
1635     // Otherwise, if this is an integer vector, pack the lane values together so
1636     // we can construct the 128-bit constant from a pair of i64s using a splat
1637     // followed by at most one i64x2.replace_lane. Also keep track of the lanes
1638     // that actually matter so we can avoid the replace_lane in more cases.
1639     std::array<uint64_t, 2> I64s{{0, 0}};
1640     std::array<uint64_t, 2> ConstLaneMasks{{0, 0}};
1641     size_t LaneBits = 128 / Lanes;
1642     size_t HalfLanes = Lanes / 2;
1643     for (size_t I = 0; I < Lanes; ++I) {
1644       const SDValue &Lane = Op.getOperand(I);
1645       if (IsConstant(Lane)) {
1646         // How much we need to shift Val to position it in an i64
1647         auto Shift = LaneBits * (I % HalfLanes);
1648         auto Mask = maskTrailingOnes<uint64_t>(LaneBits);
1649         auto Val = cast<ConstantSDNode>(Lane.getNode())->getZExtValue() & Mask;
1650         I64s[I / HalfLanes] |= Val << Shift;
1651         ConstLaneMasks[I / HalfLanes] |= Mask << Shift;
1652       }
1653     }
1654     // Check whether all constant lanes in the second half of the vector are
1655     // equivalent in the first half or vice versa to determine whether splatting
1656     // either side will be sufficient to materialize the constant. As a special
1657     // case, if the first and second halves have no constant lanes in common, we
1658     // can just combine them.
1659     bool FirstHalfSufficient = (I64s[0] & ConstLaneMasks[1]) == I64s[1];
1660     bool SecondHalfSufficient = (I64s[1] & ConstLaneMasks[0]) == I64s[0];
1661     bool CombinedSufficient = (ConstLaneMasks[0] & ConstLaneMasks[1]) == 0;
1662 
1663     uint64_t Splatted;
1664     if (SecondHalfSufficient) {
1665       Splatted = I64s[1];
1666     } else if (CombinedSufficient) {
1667       Splatted = I64s[0] | I64s[1];
1668     } else {
1669       Splatted = I64s[0];
1670     }
1671 
1672     Result = DAG.getSplatBuildVector(MVT::v2i64, DL,
1673                                      DAG.getConstant(Splatted, DL, MVT::i64));
1674     if (!FirstHalfSufficient && !SecondHalfSufficient && !CombinedSufficient) {
1675       Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i64, Result,
1676                            DAG.getConstant(I64s[1], DL, MVT::i64),
1677                            DAG.getConstant(1, DL, MVT::i32));
1678     }
1679     Result = DAG.getBitcast(VecT, Result);
1680     IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
1681       return IsConstant(Lane);
1682     };
1683   } else {
1684     // Use a splat, but possibly a load_splat
1685     LoadSDNode *SplattedLoad;
1686     if ((SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) &&
1687         SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) {
1688       Result = DAG.getMemIntrinsicNode(
1689           WebAssemblyISD::LOAD_SPLAT, DL, DAG.getVTList(VecT),
1690           {SplattedLoad->getChain(), SplattedLoad->getBasePtr(),
1691            SplattedLoad->getOffset()},
1692           SplattedLoad->getMemoryVT(), SplattedLoad->getMemOperand());
1693     } else {
1694       Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
1695     }
1696     IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
1697       return Lane == SplatValue;
1698     };
1699   }
1700 
1701   assert(Result);
1702   assert(IsLaneConstructed);
1703 
1704   // Add replace_lane instructions for any unhandled values
1705   for (size_t I = 0; I < Lanes; ++I) {
1706     const SDValue &Lane = Op->getOperand(I);
1707     if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
1708       Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
1709                            DAG.getConstant(I, DL, MVT::i32));
1710   }
1711 
1712   return Result;
1713 }
1714 
1715 SDValue
1716 WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
1717                                                SelectionDAG &DAG) const {
1718   SDLoc DL(Op);
1719   ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
1720   MVT VecType = Op.getOperand(0).getSimpleValueType();
1721   assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
1722   size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
1723 
1724   // Space for two vector args and sixteen mask indices
1725   SDValue Ops[18];
1726   size_t OpIdx = 0;
1727   Ops[OpIdx++] = Op.getOperand(0);
1728   Ops[OpIdx++] = Op.getOperand(1);
1729 
1730   // Expand mask indices to byte indices and materialize them as operands
1731   for (int M : Mask) {
1732     for (size_t J = 0; J < LaneBytes; ++J) {
1733       // Lower undefs (represented by -1 in mask) to zero
1734       uint64_t ByteIndex = M == -1 ? 0 : (uint64_t)M * LaneBytes + J;
1735       Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
1736     }
1737   }
1738 
1739   return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
1740 }
1741 
1742 SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
1743                                               SelectionDAG &DAG) const {
1744   SDLoc DL(Op);
1745   // The legalizer does not know how to expand the comparison modes of i64x2
1746   // vectors because no comparison modes are supported. We could solve this by
1747   // expanding all i64x2 SETCC nodes, but that seems to expand f64x2 SETCC nodes
1748   // (which return i64x2 results) as well. So instead we manually unroll i64x2
1749   // comparisons here.
1750   assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
1751   SmallVector<SDValue, 2> LHS, RHS;
1752   DAG.ExtractVectorElements(Op->getOperand(0), LHS);
1753   DAG.ExtractVectorElements(Op->getOperand(1), RHS);
1754   const SDValue &CC = Op->getOperand(2);
1755   auto MakeLane = [&](unsigned I) {
1756     return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
1757                        DAG.getConstant(uint64_t(-1), DL, MVT::i64),
1758                        DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
1759   };
1760   return DAG.getBuildVector(Op->getValueType(0), DL,
1761                             {MakeLane(0), MakeLane(1)});
1762 }
1763 
1764 SDValue
1765 WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
1766                                                     SelectionDAG &DAG) const {
1767   // Allow constant lane indices, expand variable lane indices
1768   SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
1769   if (isa<ConstantSDNode>(IdxNode) || IdxNode->isUndef())
1770     return Op;
1771   else
1772     // Perform default expansion
1773     return SDValue();
1774 }
1775 
1776 static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) {
1777   EVT LaneT = Op.getSimpleValueType().getVectorElementType();
1778   // 32-bit and 64-bit unrolled shifts will have proper semantics
1779   if (LaneT.bitsGE(MVT::i32))
1780     return DAG.UnrollVectorOp(Op.getNode());
1781   // Otherwise mask the shift value to get proper semantics from 32-bit shift
1782   SDLoc DL(Op);
1783   size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
1784   SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
1785   unsigned ShiftOpcode = Op.getOpcode();
1786   SmallVector<SDValue, 16> ShiftedElements;
1787   DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
1788   SmallVector<SDValue, 16> ShiftElements;
1789   DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
1790   SmallVector<SDValue, 16> UnrolledOps;
1791   for (size_t i = 0; i < NumLanes; ++i) {
1792     SDValue MaskedShiftValue =
1793         DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
1794     SDValue ShiftedValue = ShiftedElements[i];
1795     if (ShiftOpcode == ISD::SRA)
1796       ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
1797                                  ShiftedValue, DAG.getValueType(LaneT));
1798     UnrolledOps.push_back(
1799         DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
1800   }
1801   return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
1802 }
1803 
1804 SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
1805                                               SelectionDAG &DAG) const {
1806   SDLoc DL(Op);
1807 
1808   // Only manually lower vector shifts
1809   assert(Op.getSimpleValueType().isVector());
1810 
1811   auto ShiftVal = DAG.getSplatValue(Op.getOperand(1));
1812   if (!ShiftVal)
1813     return unrollVectorShift(Op, DAG);
1814 
1815   // Use anyext because none of the high bits can affect the shift
1816   ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
1817 
1818   unsigned Opcode;
1819   switch (Op.getOpcode()) {
1820   case ISD::SHL:
1821     Opcode = WebAssemblyISD::VEC_SHL;
1822     break;
1823   case ISD::SRA:
1824     Opcode = WebAssemblyISD::VEC_SHR_S;
1825     break;
1826   case ISD::SRL:
1827     Opcode = WebAssemblyISD::VEC_SHR_U;
1828     break;
1829   default:
1830     llvm_unreachable("unexpected opcode");
1831   }
1832 
1833   return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
1834 }
1835 
1836 //===----------------------------------------------------------------------===//
1837 //   Custom DAG combine hooks
1838 //===----------------------------------------------------------------------===//
1839 static SDValue
1840 performVECTOR_SHUFFLECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
1841   auto &DAG = DCI.DAG;
1842   auto Shuffle = cast<ShuffleVectorSDNode>(N);
1843 
1844   // Hoist vector bitcasts that don't change the number of lanes out of unary
1845   // shuffles, where they are less likely to get in the way of other combines.
1846   // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
1847   //  (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
1848   SDValue Bitcast = N->getOperand(0);
1849   if (Bitcast.getOpcode() != ISD::BITCAST)
1850     return SDValue();
1851   if (!N->getOperand(1).isUndef())
1852     return SDValue();
1853   SDValue CastOp = Bitcast.getOperand(0);
1854   MVT SrcType = CastOp.getSimpleValueType();
1855   MVT DstType = Bitcast.getSimpleValueType();
1856   if (!SrcType.is128BitVector() ||
1857       SrcType.getVectorNumElements() != DstType.getVectorNumElements())
1858     return SDValue();
1859   SDValue NewShuffle = DAG.getVectorShuffle(
1860       SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());
1861   return DAG.getBitcast(DstType, NewShuffle);
1862 }
1863 
1864 static SDValue performVectorWidenCombine(SDNode *N,
1865                                          TargetLowering::DAGCombinerInfo &DCI) {
1866   auto &DAG = DCI.DAG;
1867   assert(N->getOpcode() == ISD::SIGN_EXTEND ||
1868          N->getOpcode() == ISD::ZERO_EXTEND);
1869 
1870   // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
1871   // possible before the extract_subvector can be expanded.
1872   auto Extract = N->getOperand(0);
1873   if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
1874     return SDValue();
1875   auto Source = Extract.getOperand(0);
1876   auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
1877   if (IndexNode == nullptr)
1878     return SDValue();
1879   auto Index = IndexNode->getZExtValue();
1880 
1881   // Only v8i8 and v4i16 extracts can be widened, and only if the extracted
1882   // subvector is the low or high half of its source.
1883   EVT ResVT = N->getValueType(0);
1884   if (ResVT == MVT::v8i16) {
1885     if (Extract.getValueType() != MVT::v8i8 ||
1886         Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
1887       return SDValue();
1888   } else if (ResVT == MVT::v4i32) {
1889     if (Extract.getValueType() != MVT::v4i16 ||
1890         Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
1891       return SDValue();
1892   } else {
1893     return SDValue();
1894   }
1895 
1896   bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
1897   bool IsLow = Index == 0;
1898 
1899   unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::WIDEN_LOW_S
1900                                 : WebAssemblyISD::WIDEN_HIGH_S)
1901                        : (IsLow ? WebAssemblyISD::WIDEN_LOW_U
1902                                 : WebAssemblyISD::WIDEN_HIGH_U);
1903 
1904   return DAG.getNode(Op, SDLoc(N), ResVT, Source);
1905 }
1906 
1907 SDValue
1908 WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
1909                                              DAGCombinerInfo &DCI) const {
1910   switch (N->getOpcode()) {
1911   default:
1912     return SDValue();
1913   case ISD::VECTOR_SHUFFLE:
1914     return performVECTOR_SHUFFLECombine(N, DCI);
1915   case ISD::SIGN_EXTEND:
1916   case ISD::ZERO_EXTEND:
1917     return performVectorWidenCombine(N, DCI);
1918   }
1919 }
1920