1 //=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file implements the WebAssemblyTargetLowering class.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "WebAssemblyISelLowering.h"
16 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
17 #include "WebAssemblyMachineFunctionInfo.h"
18 #include "WebAssemblySubtarget.h"
19 #include "WebAssemblyTargetMachine.h"
20 #include "llvm/CodeGen/Analysis.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineJumpTableInfo.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/IR/DiagnosticInfo.h"
27 #include "llvm/IR/DiagnosticPrinter.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/Intrinsics.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include "llvm/Target/TargetOptions.h"
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "wasm-lower"
37 
38 // Emit proposed instructions that may not have been implemented in engines
39 cl::opt<bool> EnableUnimplementedWasmSIMDInstrs(
40     "wasm-enable-unimplemented-simd",
41     cl::desc("Emit potentially-unimplemented WebAssembly SIMD instructions"),
42     cl::init(false));
43 
44 WebAssemblyTargetLowering::WebAssemblyTargetLowering(
45     const TargetMachine &TM, const WebAssemblySubtarget &STI)
46     : TargetLowering(TM), Subtarget(&STI) {
47   auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
48 
49   // Booleans always contain 0 or 1.
50   setBooleanContents(ZeroOrOneBooleanContent);
51   // WebAssembly does not produce floating-point exceptions on normal floating
52   // point operations.
53   setHasFloatingPointExceptions(false);
54   // We don't know the microarchitecture here, so just reduce register pressure.
55   setSchedulingPreference(Sched::RegPressure);
56   // Tell ISel that we have a stack pointer.
57   setStackPointerRegisterToSaveRestore(
58       Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
59   // Set up the register classes.
60   addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
61   addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
62   addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
63   addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
64   if (Subtarget->hasSIMD128()) {
65     addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
66     addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
67     addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
68     addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
69     if (EnableUnimplementedWasmSIMDInstrs) {
70       addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
71       addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
72     }
73   }
74   // Compute derived properties from the register classes.
75   computeRegisterProperties(Subtarget->getRegisterInfo());
76 
77   setOperationAction(ISD::GlobalAddress, MVTPtr, Custom);
78   setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom);
79   setOperationAction(ISD::JumpTable, MVTPtr, Custom);
80   setOperationAction(ISD::BlockAddress, MVTPtr, Custom);
81   setOperationAction(ISD::BRIND, MVT::Other, Custom);
82 
83   // Take the default expansion for va_arg, va_copy, and va_end. There is no
84   // default action for va_start, so we do that custom.
85   setOperationAction(ISD::VASTART, MVT::Other, Custom);
86   setOperationAction(ISD::VAARG, MVT::Other, Expand);
87   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
88   setOperationAction(ISD::VAEND, MVT::Other, Expand);
89 
90   for (auto T : {MVT::f32, MVT::f64}) {
91     // Don't expand the floating-point types to constant pools.
92     setOperationAction(ISD::ConstantFP, T, Legal);
93     // Expand floating-point comparisons.
94     for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
95                     ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
96       setCondCodeAction(CC, T, Expand);
97     // Expand floating-point library function operators.
98     for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM,
99                     ISD::FMA})
100       setOperationAction(Op, T, Expand);
101     // Note supported floating-point library function operators that otherwise
102     // default to expand.
103     for (auto Op :
104          {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT})
105       setOperationAction(Op, T, Legal);
106     // Support minnan and maxnan, which otherwise default to expand.
107     setOperationAction(ISD::FMINNAN, T, Legal);
108     setOperationAction(ISD::FMAXNAN, T, Legal);
109     // WebAssembly currently has no builtin f16 support.
110     setOperationAction(ISD::FP16_TO_FP, T, Expand);
111     setOperationAction(ISD::FP_TO_FP16, T, Expand);
112     setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand);
113     setTruncStoreAction(T, MVT::f16, Expand);
114   }
115 
116   for (auto T : {MVT::i32, MVT::i64}) {
117     // Expand unavailable integer operations.
118     for (auto Op :
119          {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI,
120           ISD::MULHS, ISD::MULHU, ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS,
121           ISD::SRA_PARTS, ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC,
122           ISD::SUBE}) {
123       setOperationAction(Op, T, Expand);
124     }
125   }
126 
127   // As a special case, these operators use the type to mean the type to
128   // sign-extend from.
129   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
130   if (!Subtarget->hasSignExt()) {
131     for (auto T : {MVT::i8, MVT::i16, MVT::i32})
132       setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
133   }
134 
135   // Dynamic stack allocation: use the default expansion.
136   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
137   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
138   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);
139 
140   setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
141   setOperationAction(ISD::CopyToReg, MVT::Other, Custom);
142 
143   // Expand these forms; we pattern-match the forms that we can handle in isel.
144   for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
145     for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
146       setOperationAction(Op, T, Expand);
147 
148   // We have custom switch handling.
149   setOperationAction(ISD::BR_JT, MVT::Other, Custom);
150 
151   // WebAssembly doesn't have:
152   //  - Floating-point extending loads.
153   //  - Floating-point truncating stores.
154   //  - i1 extending loads.
155   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
156   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
157   for (auto T : MVT::integer_valuetypes())
158     for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
159       setLoadExtAction(Ext, T, MVT::i1, Promote);
160 
161   // Trap lowers to wasm unreachable
162   setOperationAction(ISD::TRAP, MVT::Other, Legal);
163 
164   // Exception handling intrinsics
165   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
166 
167   setMaxAtomicSizeInBitsSupported(64);
168 }
169 
170 TargetLowering::AtomicExpansionKind
171 WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
172   // We have wasm instructions for these
173   switch (AI->getOperation()) {
174   case AtomicRMWInst::Add:
175   case AtomicRMWInst::Sub:
176   case AtomicRMWInst::And:
177   case AtomicRMWInst::Or:
178   case AtomicRMWInst::Xor:
179   case AtomicRMWInst::Xchg:
180     return AtomicExpansionKind::None;
181   default:
182     break;
183   }
184   return AtomicExpansionKind::CmpXChg;
185 }
186 
187 FastISel *WebAssemblyTargetLowering::createFastISel(
188     FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
189   return WebAssembly::createFastISel(FuncInfo, LibInfo);
190 }
191 
192 bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
193     const GlobalAddressSDNode * /*GA*/) const {
194   // All offsets can be folded.
195   return true;
196 }
197 
198 MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
199                                                       EVT VT) const {
200   unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
201   if (BitWidth > 1 && BitWidth < 8) BitWidth = 8;
202 
203   if (BitWidth > 64) {
204     // The shift will be lowered to a libcall, and compiler-rt libcalls expect
205     // the count to be an i32.
206     BitWidth = 32;
207     assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) &&
208            "32-bit shift counts ought to be enough for anyone");
209   }
210 
211   MVT Result = MVT::getIntegerVT(BitWidth);
212   assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE &&
213          "Unable to represent scalar shift amount type");
214   return Result;
215 }
216 
217 // Lower an fp-to-int conversion operator from the LLVM opcode, which has an
218 // undefined result on invalid/overflow, to the WebAssembly opcode, which
219 // traps on invalid/overflow.
220 static MachineBasicBlock *
221 LowerFPToInt(
222     MachineInstr &MI,
223     DebugLoc DL,
224     MachineBasicBlock *BB,
225     const TargetInstrInfo &TII,
226     bool IsUnsigned,
227     bool Int64,
228     bool Float64,
229     unsigned LoweredOpcode
230 ) {
231   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
232 
233   unsigned OutReg = MI.getOperand(0).getReg();
234   unsigned InReg = MI.getOperand(1).getReg();
235 
236   unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
237   unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
238   unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
239   unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
240   unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
241   unsigned Eqz = WebAssembly::EQZ_I32;
242   unsigned And = WebAssembly::AND_I32;
243   int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
244   int64_t Substitute = IsUnsigned ? 0 : Limit;
245   double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
246   auto &Context = BB->getParent()->getFunction().getContext();
247   Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
248 
249   const BasicBlock *LLVM_BB = BB->getBasicBlock();
250   MachineFunction *F = BB->getParent();
251   MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVM_BB);
252   MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
253   MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVM_BB);
254 
255   MachineFunction::iterator It = ++BB->getIterator();
256   F->insert(It, FalseMBB);
257   F->insert(It, TrueMBB);
258   F->insert(It, DoneMBB);
259 
260   // Transfer the remainder of BB and its successor edges to DoneMBB.
261   DoneMBB->splice(DoneMBB->begin(), BB,
262                   std::next(MachineBasicBlock::iterator(MI)),
263                   BB->end());
264   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
265 
266   BB->addSuccessor(TrueMBB);
267   BB->addSuccessor(FalseMBB);
268   TrueMBB->addSuccessor(DoneMBB);
269   FalseMBB->addSuccessor(DoneMBB);
270 
271   unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
272   Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
273   Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
274   CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
275   EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
276   FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
277   TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
278 
279   MI.eraseFromParent();
280   // For signed numbers, we can do a single comparison to determine whether
281   // fabs(x) is within range.
282   if (IsUnsigned) {
283     Tmp0 = InReg;
284   } else {
285     BuildMI(BB, DL, TII.get(Abs), Tmp0)
286         .addReg(InReg);
287   }
288   BuildMI(BB, DL, TII.get(FConst), Tmp1)
289       .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
290   BuildMI(BB, DL, TII.get(LT), CmpReg)
291       .addReg(Tmp0)
292       .addReg(Tmp1);
293 
294   // For unsigned numbers, we have to do a separate comparison with zero.
295   if (IsUnsigned) {
296     Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
297     unsigned SecondCmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
298     unsigned AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
299     BuildMI(BB, DL, TII.get(FConst), Tmp1)
300         .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
301     BuildMI(BB, DL, TII.get(GE), SecondCmpReg)
302         .addReg(Tmp0)
303         .addReg(Tmp1);
304     BuildMI(BB, DL, TII.get(And), AndReg)
305         .addReg(CmpReg)
306         .addReg(SecondCmpReg);
307     CmpReg = AndReg;
308   }
309 
310   BuildMI(BB, DL, TII.get(Eqz), EqzReg)
311       .addReg(CmpReg);
312 
313   // Create the CFG diamond to select between doing the conversion or using
314   // the substitute value.
315   BuildMI(BB, DL, TII.get(WebAssembly::BR_IF))
316       .addMBB(TrueMBB)
317       .addReg(EqzReg);
318   BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg)
319       .addReg(InReg);
320   BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR))
321       .addMBB(DoneMBB);
322   BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg)
323       .addImm(Substitute);
324   BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
325       .addReg(FalseReg)
326       .addMBB(FalseMBB)
327       .addReg(TrueReg)
328       .addMBB(TrueMBB);
329 
330   return DoneMBB;
331 }
332 
333 MachineBasicBlock *
334 WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
335     MachineInstr &MI,
336     MachineBasicBlock *BB
337 ) const {
338   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
339   DebugLoc DL = MI.getDebugLoc();
340 
341   switch (MI.getOpcode()) {
342   default: llvm_unreachable("Unexpected instr type to insert");
343   case WebAssembly::FP_TO_SINT_I32_F32:
344     return LowerFPToInt(MI, DL, BB, TII, false, false, false,
345                         WebAssembly::I32_TRUNC_S_F32);
346   case WebAssembly::FP_TO_UINT_I32_F32:
347     return LowerFPToInt(MI, DL, BB, TII, true, false, false,
348                         WebAssembly::I32_TRUNC_U_F32);
349   case WebAssembly::FP_TO_SINT_I64_F32:
350     return LowerFPToInt(MI, DL, BB, TII, false, true, false,
351                         WebAssembly::I64_TRUNC_S_F32);
352   case WebAssembly::FP_TO_UINT_I64_F32:
353     return LowerFPToInt(MI, DL, BB, TII, true, true, false,
354                         WebAssembly::I64_TRUNC_U_F32);
355   case WebAssembly::FP_TO_SINT_I32_F64:
356     return LowerFPToInt(MI, DL, BB, TII, false, false, true,
357                         WebAssembly::I32_TRUNC_S_F64);
358   case WebAssembly::FP_TO_UINT_I32_F64:
359     return LowerFPToInt(MI, DL, BB, TII, true, false, true,
360                         WebAssembly::I32_TRUNC_U_F64);
361   case WebAssembly::FP_TO_SINT_I64_F64:
362     return LowerFPToInt(MI, DL, BB, TII, false, true, true,
363                         WebAssembly::I64_TRUNC_S_F64);
364   case WebAssembly::FP_TO_UINT_I64_F64:
365     return LowerFPToInt(MI, DL, BB, TII, true, true, true,
366                         WebAssembly::I64_TRUNC_U_F64);
367   llvm_unreachable("Unexpected instruction to emit with custom inserter");
368   }
369 }
370 
371 const char *WebAssemblyTargetLowering::getTargetNodeName(
372     unsigned Opcode) const {
373   switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
374     case WebAssemblyISD::FIRST_NUMBER:
375       break;
376 #define HANDLE_NODETYPE(NODE) \
377   case WebAssemblyISD::NODE:  \
378     return "WebAssemblyISD::" #NODE;
379 #include "WebAssemblyISD.def"
380 #undef HANDLE_NODETYPE
381   }
382   return nullptr;
383 }
384 
385 std::pair<unsigned, const TargetRegisterClass *>
386 WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
387     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
388   // First, see if this is a constraint that directly corresponds to a
389   // WebAssembly register class.
390   if (Constraint.size() == 1) {
391     switch (Constraint[0]) {
392       case 'r':
393         assert(VT != MVT::iPTR && "Pointer MVT not expected here");
394         if (Subtarget->hasSIMD128() && VT.isVector()) {
395           if (VT.getSizeInBits() == 128)
396             return std::make_pair(0U, &WebAssembly::V128RegClass);
397         }
398         if (VT.isInteger() && !VT.isVector()) {
399           if (VT.getSizeInBits() <= 32)
400             return std::make_pair(0U, &WebAssembly::I32RegClass);
401           if (VT.getSizeInBits() <= 64)
402             return std::make_pair(0U, &WebAssembly::I64RegClass);
403         }
404         break;
405       default:
406         break;
407     }
408   }
409 
410   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
411 }
412 
413 bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const {
414   // Assume ctz is a relatively cheap operation.
415   return true;
416 }
417 
418 bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const {
419   // Assume clz is a relatively cheap operation.
420   return true;
421 }
422 
423 bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
424                                                       const AddrMode &AM,
425                                                       Type *Ty,
426                                                       unsigned AS,
427                                                       Instruction *I) const {
428   // WebAssembly offsets are added as unsigned without wrapping. The
429   // isLegalAddressingMode gives us no way to determine if wrapping could be
430   // happening, so we approximate this by accepting only non-negative offsets.
431   if (AM.BaseOffs < 0) return false;
432 
433   // WebAssembly has no scale register operands.
434   if (AM.Scale != 0) return false;
435 
436   // Everything else is legal.
437   return true;
438 }
439 
440 bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
441     EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/, bool *Fast) const {
442   // WebAssembly supports unaligned accesses, though it should be declared
443   // with the p2align attribute on loads and stores which do so, and there
444   // may be a performance impact. We tell LLVM they're "fast" because
445   // for the kinds of things that LLVM uses this for (merging adjacent stores
446   // of constants, etc.), WebAssembly implementations will either want the
447   // unaligned access or they'll split anyway.
448   if (Fast) *Fast = true;
449   return true;
450 }
451 
452 bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
453                                               AttributeList Attr) const {
454   // The current thinking is that wasm engines will perform this optimization,
455   // so we can save on code size.
456   return true;
457 }
458 
459 EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
460                                                   LLVMContext &C,
461                                                   EVT VT) const {
462   if (VT.isVector())
463     return VT.changeVectorElementTypeToInteger();
464 
465   return TargetLowering::getSetCCResultType(DL, C, VT);
466 }
467 
468 bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
469                                                    const CallInst &I,
470                                                    MachineFunction &MF,
471                                                    unsigned Intrinsic) const {
472   switch (Intrinsic) {
473   case Intrinsic::wasm_atomic_notify:
474     Info.opc = ISD::INTRINSIC_W_CHAIN;
475     Info.memVT = MVT::i32;
476     Info.ptrVal = I.getArgOperand(0);
477     Info.offset = 0;
478     Info.align = 4;
479     // atomic.notify instruction does not really load the memory specified with
480     // this argument, but MachineMemOperand should either be load or store, so
481     // we set this to a load.
482     // FIXME Volatile isn't really correct, but currently all LLVM atomic
483     // instructions are treated as volatiles in the backend, so we should be
484     // consistent. The same applies for wasm_atomic_wait intrinsics too.
485     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
486     return true;
487   case Intrinsic::wasm_atomic_wait_i32:
488     Info.opc = ISD::INTRINSIC_W_CHAIN;
489     Info.memVT = MVT::i32;
490     Info.ptrVal = I.getArgOperand(0);
491     Info.offset = 0;
492     Info.align = 4;
493     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
494     return true;
495   case Intrinsic::wasm_atomic_wait_i64:
496     Info.opc = ISD::INTRINSIC_W_CHAIN;
497     Info.memVT = MVT::i64;
498     Info.ptrVal = I.getArgOperand(0);
499     Info.offset = 0;
500     Info.align = 8;
501     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
502     return true;
503   default:
504     return false;
505   }
506 }
507 
508 //===----------------------------------------------------------------------===//
509 // WebAssembly Lowering private implementation.
510 //===----------------------------------------------------------------------===//
511 
512 //===----------------------------------------------------------------------===//
513 // Lowering Code
514 //===----------------------------------------------------------------------===//
515 
516 static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *msg) {
517   MachineFunction &MF = DAG.getMachineFunction();
518   DAG.getContext()->diagnose(
519       DiagnosticInfoUnsupported(MF.getFunction(), msg, DL.getDebugLoc()));
520 }
521 
522 // Test whether the given calling convention is supported.
523 static bool CallingConvSupported(CallingConv::ID CallConv) {
524   // We currently support the language-independent target-independent
525   // conventions. We don't yet have a way to annotate calls with properties like
526   // "cold", and we don't have any call-clobbered registers, so these are mostly
527   // all handled the same.
528   return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
529          CallConv == CallingConv::Cold ||
530          CallConv == CallingConv::PreserveMost ||
531          CallConv == CallingConv::PreserveAll ||
532          CallConv == CallingConv::CXX_FAST_TLS;
533 }
534 
535 SDValue WebAssemblyTargetLowering::LowerCall(
536     CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const {
537   SelectionDAG &DAG = CLI.DAG;
538   SDLoc DL = CLI.DL;
539   SDValue Chain = CLI.Chain;
540   SDValue Callee = CLI.Callee;
541   MachineFunction &MF = DAG.getMachineFunction();
542   auto Layout = MF.getDataLayout();
543 
544   CallingConv::ID CallConv = CLI.CallConv;
545   if (!CallingConvSupported(CallConv))
546     fail(DL, DAG,
547          "WebAssembly doesn't support language-specific or target-specific "
548          "calling conventions yet");
549   if (CLI.IsPatchPoint)
550     fail(DL, DAG, "WebAssembly doesn't support patch point yet");
551 
552   // WebAssembly doesn't currently support explicit tail calls. If they are
553   // required, fail. Otherwise, just disable them.
554   if ((CallConv == CallingConv::Fast && CLI.IsTailCall &&
555        MF.getTarget().Options.GuaranteedTailCallOpt) ||
556       (CLI.CS && CLI.CS.isMustTailCall()))
557     fail(DL, DAG, "WebAssembly doesn't support tail call yet");
558   CLI.IsTailCall = false;
559 
560   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
561   if (Ins.size() > 1)
562     fail(DL, DAG, "WebAssembly doesn't support more than 1 returned value yet");
563 
564   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
565   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
566   unsigned NumFixedArgs = 0;
567   for (unsigned i = 0; i < Outs.size(); ++i) {
568     const ISD::OutputArg &Out = Outs[i];
569     SDValue &OutVal = OutVals[i];
570     if (Out.Flags.isNest())
571       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
572     if (Out.Flags.isInAlloca())
573       fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
574     if (Out.Flags.isInConsecutiveRegs())
575       fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
576     if (Out.Flags.isInConsecutiveRegsLast())
577       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
578     if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
579       auto &MFI = MF.getFrameInfo();
580       int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
581                                      Out.Flags.getByValAlign(),
582                                      /*isSS=*/false);
583       SDValue SizeNode =
584           DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
585       SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
586       Chain = DAG.getMemcpy(
587           Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getByValAlign(),
588           /*isVolatile*/ false, /*AlwaysInline=*/false,
589           /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
590       OutVal = FINode;
591     }
592     // Count the number of fixed args *after* legalization.
593     NumFixedArgs += Out.IsFixed;
594   }
595 
596   bool IsVarArg = CLI.IsVarArg;
597   auto PtrVT = getPointerTy(Layout);
598 
599   // Analyze operands of the call, assigning locations to each operand.
600   SmallVector<CCValAssign, 16> ArgLocs;
601   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
602 
603   if (IsVarArg) {
604     // Outgoing non-fixed arguments are placed in a buffer. First
605     // compute their offsets and the total amount of buffer space needed.
606     for (SDValue Arg :
607          make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) {
608       EVT VT = Arg.getValueType();
609       assert(VT != MVT::iPTR && "Legalized args should be concrete");
610       Type *Ty = VT.getTypeForEVT(*DAG.getContext());
611       unsigned Offset = CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty),
612                                              Layout.getABITypeAlignment(Ty));
613       CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
614                                         Offset, VT.getSimpleVT(),
615                                         CCValAssign::Full));
616     }
617   }
618 
619   unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
620 
621   SDValue FINode;
622   if (IsVarArg && NumBytes) {
623     // For non-fixed arguments, next emit stores to store the argument values
624     // to the stack buffer at the offsets computed above.
625     int FI = MF.getFrameInfo().CreateStackObject(NumBytes,
626                                                  Layout.getStackAlignment(),
627                                                  /*isSS=*/false);
628     unsigned ValNo = 0;
629     SmallVector<SDValue, 8> Chains;
630     for (SDValue Arg :
631          make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) {
632       assert(ArgLocs[ValNo].getValNo() == ValNo &&
633              "ArgLocs should remain in order and only hold varargs args");
634       unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
635       FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
636       SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
637                                 DAG.getConstant(Offset, DL, PtrVT));
638       Chains.push_back(DAG.getStore(
639           Chain, DL, Arg, Add,
640           MachinePointerInfo::getFixedStack(MF, FI, Offset), 0));
641     }
642     if (!Chains.empty())
643       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
644   } else if (IsVarArg) {
645     FINode = DAG.getIntPtrConstant(0, DL);
646   }
647 
648   // Compute the operands for the CALLn node.
649   SmallVector<SDValue, 16> Ops;
650   Ops.push_back(Chain);
651   Ops.push_back(Callee);
652 
653   // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
654   // isn't reliable.
655   Ops.append(OutVals.begin(),
656              IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
657   // Add a pointer to the vararg buffer.
658   if (IsVarArg) Ops.push_back(FINode);
659 
660   SmallVector<EVT, 8> InTys;
661   for (const auto &In : Ins) {
662     assert(!In.Flags.isByVal() && "byval is not valid for return values");
663     assert(!In.Flags.isNest() && "nest is not valid for return values");
664     if (In.Flags.isInAlloca())
665       fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
666     if (In.Flags.isInConsecutiveRegs())
667       fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
668     if (In.Flags.isInConsecutiveRegsLast())
669       fail(DL, DAG,
670            "WebAssembly hasn't implemented cons regs last return values");
671     // Ignore In.getOrigAlign() because all our arguments are passed in
672     // registers.
673     InTys.push_back(In.VT);
674   }
675   InTys.push_back(MVT::Other);
676   SDVTList InTyList = DAG.getVTList(InTys);
677   SDValue Res =
678       DAG.getNode(Ins.empty() ? WebAssemblyISD::CALL0 : WebAssemblyISD::CALL1,
679                   DL, InTyList, Ops);
680   if (Ins.empty()) {
681     Chain = Res;
682   } else {
683     InVals.push_back(Res);
684     Chain = Res.getValue(1);
685   }
686 
687   return Chain;
688 }
689 
690 bool WebAssemblyTargetLowering::CanLowerReturn(
691     CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
692     const SmallVectorImpl<ISD::OutputArg> &Outs,
693     LLVMContext & /*Context*/) const {
694   // WebAssembly can't currently handle returning tuples.
695   return Outs.size() <= 1;
696 }
697 
698 SDValue WebAssemblyTargetLowering::LowerReturn(
699     SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
700     const SmallVectorImpl<ISD::OutputArg> &Outs,
701     const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
702     SelectionDAG &DAG) const {
703   assert(Outs.size() <= 1 && "WebAssembly can only return up to one value");
704   if (!CallingConvSupported(CallConv))
705     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
706 
707   SmallVector<SDValue, 4> RetOps(1, Chain);
708   RetOps.append(OutVals.begin(), OutVals.end());
709   Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
710 
711   // Record the number and types of the return values.
712   for (const ISD::OutputArg &Out : Outs) {
713     assert(!Out.Flags.isByVal() && "byval is not valid for return values");
714     assert(!Out.Flags.isNest() && "nest is not valid for return values");
715     assert(Out.IsFixed && "non-fixed return value is not valid");
716     if (Out.Flags.isInAlloca())
717       fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
718     if (Out.Flags.isInConsecutiveRegs())
719       fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
720     if (Out.Flags.isInConsecutiveRegsLast())
721       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
722   }
723 
724   return Chain;
725 }
726 
727 SDValue WebAssemblyTargetLowering::LowerFormalArguments(
728     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
729     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
730     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
731   if (!CallingConvSupported(CallConv))
732     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
733 
734   MachineFunction &MF = DAG.getMachineFunction();
735   auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
736 
737   // Set up the incoming ARGUMENTS value, which serves to represent the liveness
738   // of the incoming values before they're represented by virtual registers.
739   MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
740 
741   for (const ISD::InputArg &In : Ins) {
742     if (In.Flags.isInAlloca())
743       fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
744     if (In.Flags.isNest())
745       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
746     if (In.Flags.isInConsecutiveRegs())
747       fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
748     if (In.Flags.isInConsecutiveRegsLast())
749       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
750     // Ignore In.getOrigAlign() because all our arguments are passed in
751     // registers.
752     InVals.push_back(
753         In.Used
754             ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
755                           DAG.getTargetConstant(InVals.size(), DL, MVT::i32))
756             : DAG.getUNDEF(In.VT));
757 
758     // Record the number and types of arguments.
759     MFI->addParam(In.VT);
760   }
761 
762   // Varargs are copied into a buffer allocated by the caller, and a pointer to
763   // the buffer is passed as an argument.
764   if (IsVarArg) {
765     MVT PtrVT = getPointerTy(MF.getDataLayout());
766     unsigned VarargVreg =
767         MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT));
768     MFI->setVarargBufferVreg(VarargVreg);
769     Chain = DAG.getCopyToReg(
770         Chain, DL, VarargVreg,
771         DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
772                     DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
773     MFI->addParam(PtrVT);
774   }
775 
776   // Record the number and types of results.
777   SmallVector<MVT, 4> Params;
778   SmallVector<MVT, 4> Results;
779   ComputeSignatureVTs(MF.getFunction(), DAG.getTarget(), Params, Results);
780   for (MVT VT : Results)
781     MFI->addResult(VT);
782 
783   return Chain;
784 }
785 
786 //===----------------------------------------------------------------------===//
787 //  Custom lowering hooks.
788 //===----------------------------------------------------------------------===//
789 
790 SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
791                                                   SelectionDAG &DAG) const {
792   SDLoc DL(Op);
793   switch (Op.getOpcode()) {
794     default:
795       llvm_unreachable("unimplemented operation lowering");
796       return SDValue();
797     case ISD::FrameIndex:
798       return LowerFrameIndex(Op, DAG);
799     case ISD::GlobalAddress:
800       return LowerGlobalAddress(Op, DAG);
801     case ISD::ExternalSymbol:
802       return LowerExternalSymbol(Op, DAG);
803     case ISD::JumpTable:
804       return LowerJumpTable(Op, DAG);
805     case ISD::BR_JT:
806       return LowerBR_JT(Op, DAG);
807     case ISD::VASTART:
808       return LowerVASTART(Op, DAG);
809     case ISD::BlockAddress:
810     case ISD::BRIND:
811       fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
812       return SDValue();
813     case ISD::RETURNADDR: // Probably nothing meaningful can be returned here.
814       fail(DL, DAG, "WebAssembly hasn't implemented __builtin_return_address");
815       return SDValue();
816     case ISD::FRAMEADDR:
817       return LowerFRAMEADDR(Op, DAG);
818     case ISD::CopyToReg:
819       return LowerCopyToReg(Op, DAG);
820     case ISD::INTRINSIC_WO_CHAIN:
821       return LowerINTRINSIC_WO_CHAIN(Op, DAG);
822   }
823 }
824 
825 SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
826                                                   SelectionDAG &DAG) const {
827   SDValue Src = Op.getOperand(2);
828   if (isa<FrameIndexSDNode>(Src.getNode())) {
829     // CopyToReg nodes don't support FrameIndex operands. Other targets select
830     // the FI to some LEA-like instruction, but since we don't have that, we
831     // need to insert some kind of instruction that can take an FI operand and
832     // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
833     // copy_local between Op and its FI operand.
834     SDValue Chain = Op.getOperand(0);
835     SDLoc DL(Op);
836     unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
837     EVT VT = Src.getValueType();
838     SDValue Copy(
839         DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
840                                           : WebAssembly::COPY_I64,
841                            DL, VT, Src),
842         0);
843     return Op.getNode()->getNumValues() == 1
844                ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
845                : DAG.getCopyToReg(Chain, DL, Reg, Copy, Op.getNumOperands() == 4
846                                                             ? Op.getOperand(3)
847                                                             : SDValue());
848   }
849   return SDValue();
850 }
851 
852 SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
853                                                    SelectionDAG &DAG) const {
854   int FI = cast<FrameIndexSDNode>(Op)->getIndex();
855   return DAG.getTargetFrameIndex(FI, Op.getValueType());
856 }
857 
858 SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
859                                                   SelectionDAG &DAG) const {
860   // Non-zero depths are not supported by WebAssembly currently. Use the
861   // legalizer's default expansion, which is to return 0 (what this function is
862   // documented to do).
863   if (Op.getConstantOperandVal(0) > 0)
864     return SDValue();
865 
866   DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
867   EVT VT = Op.getValueType();
868   unsigned FP =
869       Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
870   return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
871 }
872 
873 SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
874                                                       SelectionDAG &DAG) const {
875   SDLoc DL(Op);
876   const auto *GA = cast<GlobalAddressSDNode>(Op);
877   EVT VT = Op.getValueType();
878   assert(GA->getTargetFlags() == 0 &&
879          "Unexpected target flags on generic GlobalAddressSDNode");
880   if (GA->getAddressSpace() != 0)
881     fail(DL, DAG, "WebAssembly only expects the 0 address space");
882   return DAG.getNode(
883       WebAssemblyISD::Wrapper, DL, VT,
884       DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset()));
885 }
886 
887 SDValue WebAssemblyTargetLowering::LowerExternalSymbol(
888     SDValue Op, SelectionDAG &DAG) const {
889   SDLoc DL(Op);
890   const auto *ES = cast<ExternalSymbolSDNode>(Op);
891   EVT VT = Op.getValueType();
892   assert(ES->getTargetFlags() == 0 &&
893          "Unexpected target flags on generic ExternalSymbolSDNode");
894   // Set the TargetFlags to 0x1 which indicates that this is a "function"
895   // symbol rather than a data symbol. We do this unconditionally even though
896   // we don't know anything about the symbol other than its name, because all
897   // external symbols used in target-independent SelectionDAG code are for
898   // functions.
899   return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
900                      DAG.getTargetExternalSymbol(ES->getSymbol(), VT,
901                                                  WebAssemblyII::MO_SYMBOL_FUNCTION));
902 }
903 
904 SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
905                                                   SelectionDAG &DAG) const {
906   // There's no need for a Wrapper node because we always incorporate a jump
907   // table operand into a BR_TABLE instruction, rather than ever
908   // materializing it in a register.
909   const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
910   return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
911                                 JT->getTargetFlags());
912 }
913 
914 SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
915                                               SelectionDAG &DAG) const {
916   SDLoc DL(Op);
917   SDValue Chain = Op.getOperand(0);
918   const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
919   SDValue Index = Op.getOperand(2);
920   assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
921 
922   SmallVector<SDValue, 8> Ops;
923   Ops.push_back(Chain);
924   Ops.push_back(Index);
925 
926   MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
927   const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
928 
929   // Add an operand for each case.
930   for (auto MBB : MBBs) Ops.push_back(DAG.getBasicBlock(MBB));
931 
932   // TODO: For now, we just pick something arbitrary for a default case for now.
933   // We really want to sniff out the guard and put in the real default case (and
934   // delete the guard).
935   Ops.push_back(DAG.getBasicBlock(MBBs[0]));
936 
937   return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
938 }
939 
940 SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
941                                                 SelectionDAG &DAG) const {
942   SDLoc DL(Op);
943   EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
944 
945   auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
946   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
947 
948   SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
949                                     MFI->getVarargBufferVreg(), PtrVT);
950   return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
951                       MachinePointerInfo(SV), 0);
952 }
953 
954 SDValue
955 WebAssemblyTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
956                                                    SelectionDAG &DAG) const {
957   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
958   SDLoc DL(Op);
959   switch (IntNo) {
960   default:
961     return {}; // Don't custom lower most intrinsics.
962 
963   case Intrinsic::wasm_lsda:
964     // TODO For now, just return 0 not to crash
965     return DAG.getConstant(0, DL, Op.getValueType());
966   }
967 }
968 
969 //===----------------------------------------------------------------------===//
970 //                          WebAssembly Optimization Hooks
971 //===----------------------------------------------------------------------===//
972