1 //=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the WebAssemblyTargetLowering class.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "WebAssemblyISelLowering.h"
15 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
16 #include "WebAssemblyMachineFunctionInfo.h"
17 #include "WebAssemblySubtarget.h"
18 #include "WebAssemblyTargetMachine.h"
19 #include "llvm/CodeGen/Analysis.h"
20 #include "llvm/CodeGen/CallingConvLower.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineJumpTableInfo.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/CodeGen/WasmEHFuncInfo.h"
27 #include "llvm/IR/DiagnosticInfo.h"
28 #include "llvm/IR/DiagnosticPrinter.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/raw_ostream.h"
34 #include "llvm/Target/TargetOptions.h"
35 using namespace llvm;
36 
37 #define DEBUG_TYPE "wasm-lower"
38 
39 WebAssemblyTargetLowering::WebAssemblyTargetLowering(
40     const TargetMachine &TM, const WebAssemblySubtarget &STI)
41     : TargetLowering(TM), Subtarget(&STI) {
42   auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
43 
44   // Booleans always contain 0 or 1.
45   setBooleanContents(ZeroOrOneBooleanContent);
46   // Except in SIMD vectors
47   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
48   // WebAssembly does not produce floating-point exceptions on normal floating
49   // point operations.
50   setHasFloatingPointExceptions(false);
51   // We don't know the microarchitecture here, so just reduce register pressure.
52   setSchedulingPreference(Sched::RegPressure);
53   // Tell ISel that we have a stack pointer.
54   setStackPointerRegisterToSaveRestore(
55       Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
56   // Set up the register classes.
57   addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
58   addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
59   addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
60   addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
61   if (Subtarget->hasSIMD128()) {
62     addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
63     addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
64     addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
65     addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
66     if (Subtarget->hasUnimplementedSIMD128()) {
67       addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
68       addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
69     }
70   }
71   // Compute derived properties from the register classes.
72   computeRegisterProperties(Subtarget->getRegisterInfo());
73 
74   setOperationAction(ISD::GlobalAddress, MVTPtr, Custom);
75   setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom);
76   setOperationAction(ISD::JumpTable, MVTPtr, Custom);
77   setOperationAction(ISD::BlockAddress, MVTPtr, Custom);
78   setOperationAction(ISD::BRIND, MVT::Other, Custom);
79 
80   // Take the default expansion for va_arg, va_copy, and va_end. There is no
81   // default action for va_start, so we do that custom.
82   setOperationAction(ISD::VASTART, MVT::Other, Custom);
83   setOperationAction(ISD::VAARG, MVT::Other, Expand);
84   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
85   setOperationAction(ISD::VAEND, MVT::Other, Expand);
86 
87   for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
88     // Don't expand the floating-point types to constant pools.
89     setOperationAction(ISD::ConstantFP, T, Legal);
90     // Expand floating-point comparisons.
91     for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
92                     ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
93       setCondCodeAction(CC, T, Expand);
94     // Expand floating-point library function operators.
95     for (auto Op :
96          {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA})
97       setOperationAction(Op, T, Expand);
98     // Note supported floating-point library function operators that otherwise
99     // default to expand.
100     for (auto Op :
101          {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT})
102       setOperationAction(Op, T, Legal);
103     // Support minimum and maximum, which otherwise default to expand.
104     setOperationAction(ISD::FMINIMUM, T, Legal);
105     setOperationAction(ISD::FMAXIMUM, T, Legal);
106     // WebAssembly currently has no builtin f16 support.
107     setOperationAction(ISD::FP16_TO_FP, T, Expand);
108     setOperationAction(ISD::FP_TO_FP16, T, Expand);
109     setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand);
110     setTruncStoreAction(T, MVT::f16, Expand);
111   }
112 
113   // Support saturating add for i8x16 and i16x8
114   if (Subtarget->hasSIMD128())
115     for (auto T : {MVT::v16i8, MVT::v8i16})
116       for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
117         setOperationAction(Op, T, Legal);
118 
119   // Expand unavailable integer operations.
120   for (auto Op :
121        {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,
122         ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS,
123         ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) {
124     for (auto T : {MVT::i32, MVT::i64}) {
125       setOperationAction(Op, T, Expand);
126     }
127     if (Subtarget->hasSIMD128()) {
128       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) {
129         setOperationAction(Op, T, Expand);
130       }
131       if (Subtarget->hasUnimplementedSIMD128()) {
132         setOperationAction(Op, MVT::v2i64, Expand);
133       }
134     }
135   }
136 
137   // There is no i64x2.mul instruction
138   setOperationAction(ISD::MUL, MVT::v2i64, Expand);
139 
140   // We have custom shuffle lowering to expose the shuffle mask
141   if (Subtarget->hasSIMD128()) {
142     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) {
143       setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
144     }
145     if (Subtarget->hasUnimplementedSIMD128()) {
146       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
147       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
148     }
149   }
150 
151   // Custom lowering since wasm shifts must have a scalar shift amount
152   if (Subtarget->hasSIMD128()) {
153     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
154       for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
155         setOperationAction(Op, T, Custom);
156     if (Subtarget->hasUnimplementedSIMD128())
157       for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
158         setOperationAction(Op, MVT::v2i64, Custom);
159   }
160 
161   // There are no select instructions for vectors
162   if (Subtarget->hasSIMD128())
163     for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT}) {
164       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
165         setOperationAction(Op, T, Expand);
166       if (Subtarget->hasUnimplementedSIMD128())
167         for (auto T : {MVT::v2i64, MVT::v2f64})
168           setOperationAction(Op, T, Expand);
169     }
170 
171   // As a special case, these operators use the type to mean the type to
172   // sign-extend from.
173   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
174   if (!Subtarget->hasSignExt()) {
175     // Sign extends are legal only when extending a vector extract
176     auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
177     for (auto T : {MVT::i8, MVT::i16, MVT::i32})
178       setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action);
179   }
180   for (auto T : MVT::integer_vector_valuetypes())
181     setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
182 
183   // Dynamic stack allocation: use the default expansion.
184   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
185   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
186   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);
187 
188   setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
189   setOperationAction(ISD::CopyToReg, MVT::Other, Custom);
190 
191   // Expand these forms; we pattern-match the forms that we can handle in isel.
192   for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
193     for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
194       setOperationAction(Op, T, Expand);
195 
196   // We have custom switch handling.
197   setOperationAction(ISD::BR_JT, MVT::Other, Custom);
198 
199   // WebAssembly doesn't have:
200   //  - Floating-point extending loads.
201   //  - Floating-point truncating stores.
202   //  - i1 extending loads.
203   //  - extending/truncating SIMD loads/stores
204   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
205   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
206   for (auto T : MVT::integer_valuetypes())
207     for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
208       setLoadExtAction(Ext, T, MVT::i1, Promote);
209   if (Subtarget->hasSIMD128()) {
210     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
211                    MVT::v2f64}) {
212       for (auto MemT : MVT::vector_valuetypes()) {
213         if (MVT(T) != MemT) {
214           setTruncStoreAction(T, MemT, Expand);
215           for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
216             setLoadExtAction(Ext, T, MemT, Expand);
217         }
218       }
219     }
220   }
221 
222   // Expand additional SIMD ops that V8 hasn't implemented yet
223   if (Subtarget->hasSIMD128() && !Subtarget->hasUnimplementedSIMD128()) {
224     setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
225     setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
226   }
227 
228   // Custom lower lane accesses to expand out variable indices
229   if (Subtarget->hasSIMD128()) {
230     for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) {
231       setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom);
232       setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom);
233     }
234     if (Subtarget->hasUnimplementedSIMD128()) {
235       for (auto T : {MVT::v2i64, MVT::v2f64}) {
236         setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom);
237         setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom);
238       }
239     }
240   }
241 
242   // Trap lowers to wasm unreachable
243   setOperationAction(ISD::TRAP, MVT::Other, Legal);
244 
245   // Exception handling intrinsics
246   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
247   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
248 
249   setMaxAtomicSizeInBitsSupported(64);
250 }
251 
252 TargetLowering::AtomicExpansionKind
253 WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
254   // We have wasm instructions for these
255   switch (AI->getOperation()) {
256   case AtomicRMWInst::Add:
257   case AtomicRMWInst::Sub:
258   case AtomicRMWInst::And:
259   case AtomicRMWInst::Or:
260   case AtomicRMWInst::Xor:
261   case AtomicRMWInst::Xchg:
262     return AtomicExpansionKind::None;
263   default:
264     break;
265   }
266   return AtomicExpansionKind::CmpXChg;
267 }
268 
269 FastISel *WebAssemblyTargetLowering::createFastISel(
270     FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
271   return WebAssembly::createFastISel(FuncInfo, LibInfo);
272 }
273 
274 bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
275     const GlobalAddressSDNode * /*GA*/) const {
276   // All offsets can be folded.
277   return true;
278 }
279 
280 MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
281                                                       EVT VT) const {
282   unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
283   if (BitWidth > 1 && BitWidth < 8)
284     BitWidth = 8;
285 
286   if (BitWidth > 64) {
287     // The shift will be lowered to a libcall, and compiler-rt libcalls expect
288     // the count to be an i32.
289     BitWidth = 32;
290     assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) &&
291            "32-bit shift counts ought to be enough for anyone");
292   }
293 
294   MVT Result = MVT::getIntegerVT(BitWidth);
295   assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE &&
296          "Unable to represent scalar shift amount type");
297   return Result;
298 }
299 
300 // Lower an fp-to-int conversion operator from the LLVM opcode, which has an
301 // undefined result on invalid/overflow, to the WebAssembly opcode, which
302 // traps on invalid/overflow.
303 static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
304                                        MachineBasicBlock *BB,
305                                        const TargetInstrInfo &TII,
306                                        bool IsUnsigned, bool Int64,
307                                        bool Float64, unsigned LoweredOpcode) {
308   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
309 
310   unsigned OutReg = MI.getOperand(0).getReg();
311   unsigned InReg = MI.getOperand(1).getReg();
312 
313   unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
314   unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
315   unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
316   unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
317   unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
318   unsigned Eqz = WebAssembly::EQZ_I32;
319   unsigned And = WebAssembly::AND_I32;
320   int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
321   int64_t Substitute = IsUnsigned ? 0 : Limit;
322   double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
323   auto &Context = BB->getParent()->getFunction().getContext();
324   Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
325 
326   const BasicBlock *LLVM_BB = BB->getBasicBlock();
327   MachineFunction *F = BB->getParent();
328   MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVM_BB);
329   MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
330   MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVM_BB);
331 
332   MachineFunction::iterator It = ++BB->getIterator();
333   F->insert(It, FalseMBB);
334   F->insert(It, TrueMBB);
335   F->insert(It, DoneMBB);
336 
337   // Transfer the remainder of BB and its successor edges to DoneMBB.
338   DoneMBB->splice(DoneMBB->begin(), BB,
339                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
340   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
341 
342   BB->addSuccessor(TrueMBB);
343   BB->addSuccessor(FalseMBB);
344   TrueMBB->addSuccessor(DoneMBB);
345   FalseMBB->addSuccessor(DoneMBB);
346 
347   unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
348   Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
349   Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
350   CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
351   EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
352   FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
353   TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
354 
355   MI.eraseFromParent();
356   // For signed numbers, we can do a single comparison to determine whether
357   // fabs(x) is within range.
358   if (IsUnsigned) {
359     Tmp0 = InReg;
360   } else {
361     BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
362   }
363   BuildMI(BB, DL, TII.get(FConst), Tmp1)
364       .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
365   BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
366 
367   // For unsigned numbers, we have to do a separate comparison with zero.
368   if (IsUnsigned) {
369     Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
370     unsigned SecondCmpReg =
371         MRI.createVirtualRegister(&WebAssembly::I32RegClass);
372     unsigned AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
373     BuildMI(BB, DL, TII.get(FConst), Tmp1)
374         .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
375     BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
376     BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
377     CmpReg = AndReg;
378   }
379 
380   BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
381 
382   // Create the CFG diamond to select between doing the conversion or using
383   // the substitute value.
384   BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
385   BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
386   BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
387   BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
388   BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
389       .addReg(FalseReg)
390       .addMBB(FalseMBB)
391       .addReg(TrueReg)
392       .addMBB(TrueMBB);
393 
394   return DoneMBB;
395 }
396 
397 MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
398     MachineInstr &MI, MachineBasicBlock *BB) const {
399   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
400   DebugLoc DL = MI.getDebugLoc();
401 
402   switch (MI.getOpcode()) {
403   default:
404     llvm_unreachable("Unexpected instr type to insert");
405   case WebAssembly::FP_TO_SINT_I32_F32:
406     return LowerFPToInt(MI, DL, BB, TII, false, false, false,
407                         WebAssembly::I32_TRUNC_S_F32);
408   case WebAssembly::FP_TO_UINT_I32_F32:
409     return LowerFPToInt(MI, DL, BB, TII, true, false, false,
410                         WebAssembly::I32_TRUNC_U_F32);
411   case WebAssembly::FP_TO_SINT_I64_F32:
412     return LowerFPToInt(MI, DL, BB, TII, false, true, false,
413                         WebAssembly::I64_TRUNC_S_F32);
414   case WebAssembly::FP_TO_UINT_I64_F32:
415     return LowerFPToInt(MI, DL, BB, TII, true, true, false,
416                         WebAssembly::I64_TRUNC_U_F32);
417   case WebAssembly::FP_TO_SINT_I32_F64:
418     return LowerFPToInt(MI, DL, BB, TII, false, false, true,
419                         WebAssembly::I32_TRUNC_S_F64);
420   case WebAssembly::FP_TO_UINT_I32_F64:
421     return LowerFPToInt(MI, DL, BB, TII, true, false, true,
422                         WebAssembly::I32_TRUNC_U_F64);
423   case WebAssembly::FP_TO_SINT_I64_F64:
424     return LowerFPToInt(MI, DL, BB, TII, false, true, true,
425                         WebAssembly::I64_TRUNC_S_F64);
426   case WebAssembly::FP_TO_UINT_I64_F64:
427     return LowerFPToInt(MI, DL, BB, TII, true, true, true,
428                         WebAssembly::I64_TRUNC_U_F64);
429     llvm_unreachable("Unexpected instruction to emit with custom inserter");
430   }
431 }
432 
433 const char *
434 WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {
435   switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
436   case WebAssemblyISD::FIRST_NUMBER:
437     break;
438 #define HANDLE_NODETYPE(NODE)                                                  \
439   case WebAssemblyISD::NODE:                                                   \
440     return "WebAssemblyISD::" #NODE;
441 #include "WebAssemblyISD.def"
442 #undef HANDLE_NODETYPE
443   }
444   return nullptr;
445 }
446 
447 std::pair<unsigned, const TargetRegisterClass *>
448 WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
449     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
450   // First, see if this is a constraint that directly corresponds to a
451   // WebAssembly register class.
452   if (Constraint.size() == 1) {
453     switch (Constraint[0]) {
454     case 'r':
455       assert(VT != MVT::iPTR && "Pointer MVT not expected here");
456       if (Subtarget->hasSIMD128() && VT.isVector()) {
457         if (VT.getSizeInBits() == 128)
458           return std::make_pair(0U, &WebAssembly::V128RegClass);
459       }
460       if (VT.isInteger() && !VT.isVector()) {
461         if (VT.getSizeInBits() <= 32)
462           return std::make_pair(0U, &WebAssembly::I32RegClass);
463         if (VT.getSizeInBits() <= 64)
464           return std::make_pair(0U, &WebAssembly::I64RegClass);
465       }
466       break;
467     default:
468       break;
469     }
470   }
471 
472   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
473 }
474 
475 bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const {
476   // Assume ctz is a relatively cheap operation.
477   return true;
478 }
479 
480 bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const {
481   // Assume clz is a relatively cheap operation.
482   return true;
483 }
484 
485 bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
486                                                       const AddrMode &AM,
487                                                       Type *Ty, unsigned AS,
488                                                       Instruction *I) const {
489   // WebAssembly offsets are added as unsigned without wrapping. The
490   // isLegalAddressingMode gives us no way to determine if wrapping could be
491   // happening, so we approximate this by accepting only non-negative offsets.
492   if (AM.BaseOffs < 0)
493     return false;
494 
495   // WebAssembly has no scale register operands.
496   if (AM.Scale != 0)
497     return false;
498 
499   // Everything else is legal.
500   return true;
501 }
502 
503 bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
504     EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/, bool *Fast) const {
505   // WebAssembly supports unaligned accesses, though it should be declared
506   // with the p2align attribute on loads and stores which do so, and there
507   // may be a performance impact. We tell LLVM they're "fast" because
508   // for the kinds of things that LLVM uses this for (merging adjacent stores
509   // of constants, etc.), WebAssembly implementations will either want the
510   // unaligned access or they'll split anyway.
511   if (Fast)
512     *Fast = true;
513   return true;
514 }
515 
516 bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
517                                               AttributeList Attr) const {
518   // The current thinking is that wasm engines will perform this optimization,
519   // so we can save on code size.
520   return true;
521 }
522 
523 EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
524                                                   LLVMContext &C,
525                                                   EVT VT) const {
526   if (VT.isVector())
527     return VT.changeVectorElementTypeToInteger();
528 
529   return TargetLowering::getSetCCResultType(DL, C, VT);
530 }
531 
532 bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
533                                                    const CallInst &I,
534                                                    MachineFunction &MF,
535                                                    unsigned Intrinsic) const {
536   switch (Intrinsic) {
537   case Intrinsic::wasm_atomic_notify:
538     Info.opc = ISD::INTRINSIC_W_CHAIN;
539     Info.memVT = MVT::i32;
540     Info.ptrVal = I.getArgOperand(0);
541     Info.offset = 0;
542     Info.align = 4;
543     // atomic.notify instruction does not really load the memory specified with
544     // this argument, but MachineMemOperand should either be load or store, so
545     // we set this to a load.
546     // FIXME Volatile isn't really correct, but currently all LLVM atomic
547     // instructions are treated as volatiles in the backend, so we should be
548     // consistent. The same applies for wasm_atomic_wait intrinsics too.
549     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
550     return true;
551   case Intrinsic::wasm_atomic_wait_i32:
552     Info.opc = ISD::INTRINSIC_W_CHAIN;
553     Info.memVT = MVT::i32;
554     Info.ptrVal = I.getArgOperand(0);
555     Info.offset = 0;
556     Info.align = 4;
557     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
558     return true;
559   case Intrinsic::wasm_atomic_wait_i64:
560     Info.opc = ISD::INTRINSIC_W_CHAIN;
561     Info.memVT = MVT::i64;
562     Info.ptrVal = I.getArgOperand(0);
563     Info.offset = 0;
564     Info.align = 8;
565     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
566     return true;
567   default:
568     return false;
569   }
570 }
571 
572 //===----------------------------------------------------------------------===//
573 // WebAssembly Lowering private implementation.
574 //===----------------------------------------------------------------------===//
575 
576 //===----------------------------------------------------------------------===//
577 // Lowering Code
578 //===----------------------------------------------------------------------===//
579 
580 static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *msg) {
581   MachineFunction &MF = DAG.getMachineFunction();
582   DAG.getContext()->diagnose(
583       DiagnosticInfoUnsupported(MF.getFunction(), msg, DL.getDebugLoc()));
584 }
585 
586 // Test whether the given calling convention is supported.
587 static bool CallingConvSupported(CallingConv::ID CallConv) {
588   // We currently support the language-independent target-independent
589   // conventions. We don't yet have a way to annotate calls with properties like
590   // "cold", and we don't have any call-clobbered registers, so these are mostly
591   // all handled the same.
592   return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
593          CallConv == CallingConv::Cold ||
594          CallConv == CallingConv::PreserveMost ||
595          CallConv == CallingConv::PreserveAll ||
596          CallConv == CallingConv::CXX_FAST_TLS;
597 }
598 
599 SDValue
600 WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
601                                      SmallVectorImpl<SDValue> &InVals) const {
602   SelectionDAG &DAG = CLI.DAG;
603   SDLoc DL = CLI.DL;
604   SDValue Chain = CLI.Chain;
605   SDValue Callee = CLI.Callee;
606   MachineFunction &MF = DAG.getMachineFunction();
607   auto Layout = MF.getDataLayout();
608 
609   CallingConv::ID CallConv = CLI.CallConv;
610   if (!CallingConvSupported(CallConv))
611     fail(DL, DAG,
612          "WebAssembly doesn't support language-specific or target-specific "
613          "calling conventions yet");
614   if (CLI.IsPatchPoint)
615     fail(DL, DAG, "WebAssembly doesn't support patch point yet");
616 
617   // WebAssembly doesn't currently support explicit tail calls. If they are
618   // required, fail. Otherwise, just disable them.
619   if ((CallConv == CallingConv::Fast && CLI.IsTailCall &&
620        MF.getTarget().Options.GuaranteedTailCallOpt) ||
621       (CLI.CS && CLI.CS.isMustTailCall()))
622     fail(DL, DAG, "WebAssembly doesn't support tail call yet");
623   CLI.IsTailCall = false;
624 
625   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
626   if (Ins.size() > 1)
627     fail(DL, DAG, "WebAssembly doesn't support more than 1 returned value yet");
628 
629   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
630   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
631   unsigned NumFixedArgs = 0;
632   for (unsigned i = 0; i < Outs.size(); ++i) {
633     const ISD::OutputArg &Out = Outs[i];
634     SDValue &OutVal = OutVals[i];
635     if (Out.Flags.isNest())
636       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
637     if (Out.Flags.isInAlloca())
638       fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
639     if (Out.Flags.isInConsecutiveRegs())
640       fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
641     if (Out.Flags.isInConsecutiveRegsLast())
642       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
643     if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
644       auto &MFI = MF.getFrameInfo();
645       int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
646                                      Out.Flags.getByValAlign(),
647                                      /*isSS=*/false);
648       SDValue SizeNode =
649           DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
650       SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
651       Chain = DAG.getMemcpy(
652           Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getByValAlign(),
653           /*isVolatile*/ false, /*AlwaysInline=*/false,
654           /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
655       OutVal = FINode;
656     }
657     // Count the number of fixed args *after* legalization.
658     NumFixedArgs += Out.IsFixed;
659   }
660 
661   bool IsVarArg = CLI.IsVarArg;
662   auto PtrVT = getPointerTy(Layout);
663 
664   // Analyze operands of the call, assigning locations to each operand.
665   SmallVector<CCValAssign, 16> ArgLocs;
666   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
667 
668   if (IsVarArg) {
669     // Outgoing non-fixed arguments are placed in a buffer. First
670     // compute their offsets and the total amount of buffer space needed.
671     for (SDValue Arg :
672          make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) {
673       EVT VT = Arg.getValueType();
674       assert(VT != MVT::iPTR && "Legalized args should be concrete");
675       Type *Ty = VT.getTypeForEVT(*DAG.getContext());
676       unsigned Offset = CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty),
677                                              Layout.getABITypeAlignment(Ty));
678       CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
679                                         Offset, VT.getSimpleVT(),
680                                         CCValAssign::Full));
681     }
682   }
683 
684   unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
685 
686   SDValue FINode;
687   if (IsVarArg && NumBytes) {
688     // For non-fixed arguments, next emit stores to store the argument values
689     // to the stack buffer at the offsets computed above.
690     int FI = MF.getFrameInfo().CreateStackObject(NumBytes,
691                                                  Layout.getStackAlignment(),
692                                                  /*isSS=*/false);
693     unsigned ValNo = 0;
694     SmallVector<SDValue, 8> Chains;
695     for (SDValue Arg :
696          make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) {
697       assert(ArgLocs[ValNo].getValNo() == ValNo &&
698              "ArgLocs should remain in order and only hold varargs args");
699       unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
700       FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
701       SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
702                                 DAG.getConstant(Offset, DL, PtrVT));
703       Chains.push_back(
704           DAG.getStore(Chain, DL, Arg, Add,
705                        MachinePointerInfo::getFixedStack(MF, FI, Offset), 0));
706     }
707     if (!Chains.empty())
708       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
709   } else if (IsVarArg) {
710     FINode = DAG.getIntPtrConstant(0, DL);
711   }
712 
713   // Compute the operands for the CALLn node.
714   SmallVector<SDValue, 16> Ops;
715   Ops.push_back(Chain);
716   Ops.push_back(Callee);
717 
718   // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
719   // isn't reliable.
720   Ops.append(OutVals.begin(),
721              IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
722   // Add a pointer to the vararg buffer.
723   if (IsVarArg)
724     Ops.push_back(FINode);
725 
726   SmallVector<EVT, 8> InTys;
727   for (const auto &In : Ins) {
728     assert(!In.Flags.isByVal() && "byval is not valid for return values");
729     assert(!In.Flags.isNest() && "nest is not valid for return values");
730     if (In.Flags.isInAlloca())
731       fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
732     if (In.Flags.isInConsecutiveRegs())
733       fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
734     if (In.Flags.isInConsecutiveRegsLast())
735       fail(DL, DAG,
736            "WebAssembly hasn't implemented cons regs last return values");
737     // Ignore In.getOrigAlign() because all our arguments are passed in
738     // registers.
739     InTys.push_back(In.VT);
740   }
741   InTys.push_back(MVT::Other);
742   SDVTList InTyList = DAG.getVTList(InTys);
743   SDValue Res =
744       DAG.getNode(Ins.empty() ? WebAssemblyISD::CALL0 : WebAssemblyISD::CALL1,
745                   DL, InTyList, Ops);
746   if (Ins.empty()) {
747     Chain = Res;
748   } else {
749     InVals.push_back(Res);
750     Chain = Res.getValue(1);
751   }
752 
753   return Chain;
754 }
755 
756 bool WebAssemblyTargetLowering::CanLowerReturn(
757     CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
758     const SmallVectorImpl<ISD::OutputArg> &Outs,
759     LLVMContext & /*Context*/) const {
760   // WebAssembly can't currently handle returning tuples.
761   return Outs.size() <= 1;
762 }
763 
764 SDValue WebAssemblyTargetLowering::LowerReturn(
765     SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
766     const SmallVectorImpl<ISD::OutputArg> &Outs,
767     const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
768     SelectionDAG &DAG) const {
769   assert(Outs.size() <= 1 && "WebAssembly can only return up to one value");
770   if (!CallingConvSupported(CallConv))
771     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
772 
773   SmallVector<SDValue, 4> RetOps(1, Chain);
774   RetOps.append(OutVals.begin(), OutVals.end());
775   Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
776 
777   // Record the number and types of the return values.
778   for (const ISD::OutputArg &Out : Outs) {
779     assert(!Out.Flags.isByVal() && "byval is not valid for return values");
780     assert(!Out.Flags.isNest() && "nest is not valid for return values");
781     assert(Out.IsFixed && "non-fixed return value is not valid");
782     if (Out.Flags.isInAlloca())
783       fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
784     if (Out.Flags.isInConsecutiveRegs())
785       fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
786     if (Out.Flags.isInConsecutiveRegsLast())
787       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
788   }
789 
790   return Chain;
791 }
792 
793 SDValue WebAssemblyTargetLowering::LowerFormalArguments(
794     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
795     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
796     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
797   if (!CallingConvSupported(CallConv))
798     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
799 
800   MachineFunction &MF = DAG.getMachineFunction();
801   auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
802 
803   // Set up the incoming ARGUMENTS value, which serves to represent the liveness
804   // of the incoming values before they're represented by virtual registers.
805   MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
806 
807   for (const ISD::InputArg &In : Ins) {
808     if (In.Flags.isInAlloca())
809       fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
810     if (In.Flags.isNest())
811       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
812     if (In.Flags.isInConsecutiveRegs())
813       fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
814     if (In.Flags.isInConsecutiveRegsLast())
815       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
816     // Ignore In.getOrigAlign() because all our arguments are passed in
817     // registers.
818     InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
819                                            DAG.getTargetConstant(InVals.size(),
820                                                                  DL, MVT::i32))
821                              : DAG.getUNDEF(In.VT));
822 
823     // Record the number and types of arguments.
824     MFI->addParam(In.VT);
825   }
826 
827   // Varargs are copied into a buffer allocated by the caller, and a pointer to
828   // the buffer is passed as an argument.
829   if (IsVarArg) {
830     MVT PtrVT = getPointerTy(MF.getDataLayout());
831     unsigned VarargVreg =
832         MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT));
833     MFI->setVarargBufferVreg(VarargVreg);
834     Chain = DAG.getCopyToReg(
835         Chain, DL, VarargVreg,
836         DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
837                     DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
838     MFI->addParam(PtrVT);
839   }
840 
841   // Record the number and types of arguments and results.
842   SmallVector<MVT, 4> Params;
843   SmallVector<MVT, 4> Results;
844   ComputeSignatureVTs(MF.getFunction().getFunctionType(), MF.getFunction(),
845                       DAG.getTarget(), Params, Results);
846   for (MVT VT : Results)
847     MFI->addResult(VT);
848   // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
849   // the param logic here with ComputeSignatureVTs
850   assert(MFI->getParams().size() == Params.size() &&
851          std::equal(MFI->getParams().begin(), MFI->getParams().end(),
852                     Params.begin()));
853 
854   return Chain;
855 }
856 
857 //===----------------------------------------------------------------------===//
858 //  Custom lowering hooks.
859 //===----------------------------------------------------------------------===//
860 
861 SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
862                                                   SelectionDAG &DAG) const {
863   SDLoc DL(Op);
864   switch (Op.getOpcode()) {
865   default:
866     llvm_unreachable("unimplemented operation lowering");
867     return SDValue();
868   case ISD::FrameIndex:
869     return LowerFrameIndex(Op, DAG);
870   case ISD::GlobalAddress:
871     return LowerGlobalAddress(Op, DAG);
872   case ISD::ExternalSymbol:
873     return LowerExternalSymbol(Op, DAG);
874   case ISD::JumpTable:
875     return LowerJumpTable(Op, DAG);
876   case ISD::BR_JT:
877     return LowerBR_JT(Op, DAG);
878   case ISD::VASTART:
879     return LowerVASTART(Op, DAG);
880   case ISD::BlockAddress:
881   case ISD::BRIND:
882     fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
883     return SDValue();
884   case ISD::RETURNADDR: // Probably nothing meaningful can be returned here.
885     fail(DL, DAG, "WebAssembly hasn't implemented __builtin_return_address");
886     return SDValue();
887   case ISD::FRAMEADDR:
888     return LowerFRAMEADDR(Op, DAG);
889   case ISD::CopyToReg:
890     return LowerCopyToReg(Op, DAG);
891   case ISD::INTRINSIC_WO_CHAIN:
892     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
893   case ISD::EXTRACT_VECTOR_ELT:
894   case ISD::INSERT_VECTOR_ELT:
895     return LowerAccessVectorElement(Op, DAG);
896   case ISD::INTRINSIC_VOID:
897     return LowerINTRINSIC_VOID(Op, DAG);
898   case ISD::SIGN_EXTEND_INREG:
899     return LowerSIGN_EXTEND_INREG(Op, DAG);
900   case ISD::VECTOR_SHUFFLE:
901     return LowerVECTOR_SHUFFLE(Op, DAG);
902   case ISD::SHL:
903   case ISD::SRA:
904   case ISD::SRL:
905     return LowerShift(Op, DAG);
906   }
907 }
908 
909 SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
910                                                   SelectionDAG &DAG) const {
911   SDValue Src = Op.getOperand(2);
912   if (isa<FrameIndexSDNode>(Src.getNode())) {
913     // CopyToReg nodes don't support FrameIndex operands. Other targets select
914     // the FI to some LEA-like instruction, but since we don't have that, we
915     // need to insert some kind of instruction that can take an FI operand and
916     // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
917     // local.copy between Op and its FI operand.
918     SDValue Chain = Op.getOperand(0);
919     SDLoc DL(Op);
920     unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
921     EVT VT = Src.getValueType();
922     SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
923                                                    : WebAssembly::COPY_I64,
924                                     DL, VT, Src),
925                  0);
926     return Op.getNode()->getNumValues() == 1
927                ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
928                : DAG.getCopyToReg(Chain, DL, Reg, Copy,
929                                   Op.getNumOperands() == 4 ? Op.getOperand(3)
930                                                            : SDValue());
931   }
932   return SDValue();
933 }
934 
935 SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
936                                                    SelectionDAG &DAG) const {
937   int FI = cast<FrameIndexSDNode>(Op)->getIndex();
938   return DAG.getTargetFrameIndex(FI, Op.getValueType());
939 }
940 
941 SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
942                                                   SelectionDAG &DAG) const {
943   // Non-zero depths are not supported by WebAssembly currently. Use the
944   // legalizer's default expansion, which is to return 0 (what this function is
945   // documented to do).
946   if (Op.getConstantOperandVal(0) > 0)
947     return SDValue();
948 
949   DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
950   EVT VT = Op.getValueType();
951   unsigned FP =
952       Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
953   return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
954 }
955 
956 SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
957                                                       SelectionDAG &DAG) const {
958   SDLoc DL(Op);
959   const auto *GA = cast<GlobalAddressSDNode>(Op);
960   EVT VT = Op.getValueType();
961   assert(GA->getTargetFlags() == 0 &&
962          "Unexpected target flags on generic GlobalAddressSDNode");
963   if (GA->getAddressSpace() != 0)
964     fail(DL, DAG, "WebAssembly only expects the 0 address space");
965   return DAG.getNode(
966       WebAssemblyISD::Wrapper, DL, VT,
967       DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset()));
968 }
969 
970 SDValue
971 WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
972                                                SelectionDAG &DAG) const {
973   SDLoc DL(Op);
974   const auto *ES = cast<ExternalSymbolSDNode>(Op);
975   EVT VT = Op.getValueType();
976   assert(ES->getTargetFlags() == 0 &&
977          "Unexpected target flags on generic ExternalSymbolSDNode");
978   // Set the TargetFlags to 0x1 which indicates that this is a "function"
979   // symbol rather than a data symbol. We do this unconditionally even though
980   // we don't know anything about the symbol other than its name, because all
981   // external symbols used in target-independent SelectionDAG code are for
982   // functions.
983   return DAG.getNode(
984       WebAssemblyISD::Wrapper, DL, VT,
985       DAG.getTargetExternalSymbol(ES->getSymbol(), VT,
986                                   WebAssemblyII::MO_SYMBOL_FUNCTION));
987 }
988 
989 SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
990                                                   SelectionDAG &DAG) const {
991   // There's no need for a Wrapper node because we always incorporate a jump
992   // table operand into a BR_TABLE instruction, rather than ever
993   // materializing it in a register.
994   const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
995   return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
996                                 JT->getTargetFlags());
997 }
998 
999 SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
1000                                               SelectionDAG &DAG) const {
1001   SDLoc DL(Op);
1002   SDValue Chain = Op.getOperand(0);
1003   const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
1004   SDValue Index = Op.getOperand(2);
1005   assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
1006 
1007   SmallVector<SDValue, 8> Ops;
1008   Ops.push_back(Chain);
1009   Ops.push_back(Index);
1010 
1011   MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
1012   const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
1013 
1014   // Add an operand for each case.
1015   for (auto MBB : MBBs)
1016     Ops.push_back(DAG.getBasicBlock(MBB));
1017 
1018   // TODO: For now, we just pick something arbitrary for a default case for now.
1019   // We really want to sniff out the guard and put in the real default case (and
1020   // delete the guard).
1021   Ops.push_back(DAG.getBasicBlock(MBBs[0]));
1022 
1023   return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
1024 }
1025 
1026 SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
1027                                                 SelectionDAG &DAG) const {
1028   SDLoc DL(Op);
1029   EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
1030 
1031   auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
1032   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1033 
1034   SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
1035                                     MFI->getVarargBufferVreg(), PtrVT);
1036   return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
1037                       MachinePointerInfo(SV), 0);
1038 }
1039 
1040 SDValue
1041 WebAssemblyTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
1042                                                    SelectionDAG &DAG) const {
1043   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1044   SDLoc DL(Op);
1045   switch (IntNo) {
1046   default:
1047     return {}; // Don't custom lower most intrinsics.
1048 
1049   case Intrinsic::wasm_lsda: {
1050     MachineFunction &MF = DAG.getMachineFunction();
1051     EVT VT = Op.getValueType();
1052     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1053     MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
1054     auto &Context = MF.getMMI().getContext();
1055     MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") +
1056                                             Twine(MF.getFunctionNumber()));
1057     return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
1058                        DAG.getMCSymbol(S, PtrVT));
1059   }
1060   }
1061 }
1062 
1063 SDValue
1064 WebAssemblyTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
1065                                                SelectionDAG &DAG) const {
1066   MachineFunction &MF = DAG.getMachineFunction();
1067   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1068   SDLoc DL(Op);
1069 
1070   switch (IntNo) {
1071   default:
1072     return {}; // Don't custom lower most intrinsics.
1073 
1074   case Intrinsic::wasm_throw: {
1075     int Tag = cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
1076     switch (Tag) {
1077     case CPP_EXCEPTION: {
1078       const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1079       MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
1080       const char *SymName = MF.createExternalSymbolName("__cpp_exception");
1081       SDValue SymNode =
1082           DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
1083                       DAG.getTargetExternalSymbol(
1084                           SymName, PtrVT, WebAssemblyII::MO_SYMBOL_EVENT));
1085       return DAG.getNode(WebAssemblyISD::THROW, DL,
1086                          MVT::Other, // outchain type
1087                          {
1088                              Op.getOperand(0), // inchain
1089                              SymNode,          // exception symbol
1090                              Op.getOperand(3)  // thrown value
1091                          });
1092     }
1093     default:
1094       llvm_unreachable("Invalid tag!");
1095     }
1096     break;
1097   }
1098   }
1099 }
1100 
1101 SDValue
1102 WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
1103                                                   SelectionDAG &DAG) const {
1104   // If sign extension operations are disabled, allow sext_inreg only if operand
1105   // is a vector extract. SIMD does not depend on sign extension operations, but
1106   // allowing sext_inreg in this context lets us have simple patterns to select
1107   // extract_lane_s instructions. Expanding sext_inreg everywhere would be
1108   // simpler in this file, but would necessitate large and brittle patterns to
1109   // undo the expansion and select extract_lane_s instructions.
1110   assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
1111   if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT)
1112     return Op;
1113   // Otherwise expand
1114   return SDValue();
1115 }
1116 
1117 SDValue
1118 WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
1119                                                SelectionDAG &DAG) const {
1120   SDLoc DL(Op);
1121   ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
1122   MVT VecType = Op.getOperand(0).getSimpleValueType();
1123   assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
1124   size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
1125 
1126   // Space for two vector args and sixteen mask indices
1127   SDValue Ops[18];
1128   size_t OpIdx = 0;
1129   Ops[OpIdx++] = Op.getOperand(0);
1130   Ops[OpIdx++] = Op.getOperand(1);
1131 
1132   // Expand mask indices to byte indices and materialize them as operands
1133   for (size_t I = 0, Lanes = Mask.size(); I < Lanes; ++I) {
1134     for (size_t J = 0; J < LaneBytes; ++J) {
1135       // Lower undefs (represented by -1 in mask) to zero
1136       uint64_t ByteIndex =
1137           Mask[I] == -1 ? 0 : (uint64_t)Mask[I] * LaneBytes + J;
1138       Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
1139     }
1140   }
1141 
1142   return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
1143 }
1144 
1145 SDValue
1146 WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
1147                                                     SelectionDAG &DAG) const {
1148   // Allow constant lane indices, expand variable lane indices
1149   SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
1150   if (isa<ConstantSDNode>(IdxNode) || IdxNode->isUndef())
1151     return Op;
1152   else
1153     // Perform default expansion
1154     return SDValue();
1155 }
1156 
1157 static SDValue UnrollVectorShift(SDValue Op, SelectionDAG &DAG) {
1158   EVT LaneT = Op.getSimpleValueType().getVectorElementType();
1159   // 32-bit and 64-bit unrolled shifts will have proper semantics
1160   if (LaneT.bitsGE(MVT::i32))
1161     return DAG.UnrollVectorOp(Op.getNode());
1162   // Otherwise mask the shift value to get proper semantics from 32-bit shift
1163   SDLoc DL(Op);
1164   SDValue ShiftVal = Op.getOperand(1);
1165   uint64_t MaskVal = LaneT.getSizeInBits() - 1;
1166   SDValue MaskedShiftVal = DAG.getNode(
1167       ISD::AND,                    // mask opcode
1168       DL, ShiftVal.getValueType(), // masked value type
1169       ShiftVal,                    // original shift value operand
1170       DAG.getConstant(MaskVal, DL, ShiftVal.getValueType()) // mask operand
1171   );
1172 
1173   return DAG.UnrollVectorOp(
1174       DAG.getNode(Op.getOpcode(),        // original shift opcode
1175                   DL, Op.getValueType(), // original return type
1176                   Op.getOperand(0),      // original vector operand,
1177                   MaskedShiftVal         // new masked shift value operand
1178                   )
1179           .getNode());
1180 }
1181 
1182 SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
1183                                               SelectionDAG &DAG) const {
1184   SDLoc DL(Op);
1185 
1186   // Only manually lower vector shifts
1187   assert(Op.getSimpleValueType().isVector());
1188 
1189   // Expand all vector shifts until V8 fixes its implementation
1190   // TODO: remove this once V8 is fixed
1191   if (!Subtarget->hasUnimplementedSIMD128())
1192     return UnrollVectorShift(Op, DAG);
1193 
1194   // Unroll non-splat vector shifts
1195   BuildVectorSDNode *ShiftVec;
1196   SDValue SplatVal;
1197   if (!(ShiftVec = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) ||
1198       !(SplatVal = ShiftVec->getSplatValue()))
1199     return UnrollVectorShift(Op, DAG);
1200 
1201   // All splats except i64x2 const splats are handled by patterns
1202   ConstantSDNode *SplatConst = dyn_cast<ConstantSDNode>(SplatVal);
1203   if (!SplatConst || Op.getSimpleValueType() != MVT::v2i64)
1204     return Op;
1205 
1206   // i64x2 const splats are custom lowered to avoid unnecessary wraps
1207   unsigned Opcode;
1208   switch (Op.getOpcode()) {
1209   case ISD::SHL:
1210     Opcode = WebAssemblyISD::VEC_SHL;
1211     break;
1212   case ISD::SRA:
1213     Opcode = WebAssemblyISD::VEC_SHR_S;
1214     break;
1215   case ISD::SRL:
1216     Opcode = WebAssemblyISD::VEC_SHR_U;
1217     break;
1218   default:
1219     llvm_unreachable("unexpected opcode");
1220   }
1221   APInt Shift = SplatConst->getAPIntValue().zextOrTrunc(32);
1222   return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0),
1223                      DAG.getConstant(Shift, DL, MVT::i32));
1224 }
1225 
1226 //===----------------------------------------------------------------------===//
1227 //                          WebAssembly Optimization Hooks
1228 //===----------------------------------------------------------------------===//
1229