1 //=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file implements the WebAssemblyTargetLowering class.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "WebAssemblyISelLowering.h"
16 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
17 #include "WebAssemblyMachineFunctionInfo.h"
18 #include "WebAssemblySubtarget.h"
19 #include "WebAssemblyTargetMachine.h"
20 #include "llvm/CodeGen/Analysis.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineJumpTableInfo.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/IR/DiagnosticInfo.h"
27 #include "llvm/IR/DiagnosticPrinter.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/Intrinsics.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include "llvm/Target/TargetOptions.h"
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "wasm-lower"
37 
38 // Emit proposed instructions that may not have been implemented in engines
39 cl::opt<bool> EnableUnimplementedWasmSIMDInstrs(
40     "wasm-enable-unimplemented-simd",
41     cl::desc("Emit potentially-unimplemented WebAssembly SIMD instructions"),
42     cl::init(false));
43 
44 WebAssemblyTargetLowering::WebAssemblyTargetLowering(
45     const TargetMachine &TM, const WebAssemblySubtarget &STI)
46     : TargetLowering(TM), Subtarget(&STI) {
47   auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
48 
49   // Booleans always contain 0 or 1.
50   setBooleanContents(ZeroOrOneBooleanContent);
51   // WebAssembly does not produce floating-point exceptions on normal floating
52   // point operations.
53   setHasFloatingPointExceptions(false);
54   // We don't know the microarchitecture here, so just reduce register pressure.
55   setSchedulingPreference(Sched::RegPressure);
56   // Tell ISel that we have a stack pointer.
57   setStackPointerRegisterToSaveRestore(
58       Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
59   // Set up the register classes.
60   addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
61   addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
62   addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
63   addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
64   if (Subtarget->hasSIMD128()) {
65     addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
66     addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
67     addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
68     addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
69     if (EnableUnimplementedWasmSIMDInstrs) {
70       addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
71       addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
72     }
73   }
74   // Compute derived properties from the register classes.
75   computeRegisterProperties(Subtarget->getRegisterInfo());
76 
77   setOperationAction(ISD::GlobalAddress, MVTPtr, Custom);
78   setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom);
79   setOperationAction(ISD::JumpTable, MVTPtr, Custom);
80   setOperationAction(ISD::BlockAddress, MVTPtr, Custom);
81   setOperationAction(ISD::BRIND, MVT::Other, Custom);
82 
83   // Take the default expansion for va_arg, va_copy, and va_end. There is no
84   // default action for va_start, so we do that custom.
85   setOperationAction(ISD::VASTART, MVT::Other, Custom);
86   setOperationAction(ISD::VAARG, MVT::Other, Expand);
87   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
88   setOperationAction(ISD::VAEND, MVT::Other, Expand);
89 
90   for (auto T : {MVT::f32, MVT::f64}) {
91     // Don't expand the floating-point types to constant pools.
92     setOperationAction(ISD::ConstantFP, T, Legal);
93     // Expand floating-point comparisons.
94     for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
95                     ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
96       setCondCodeAction(CC, T, Expand);
97     // Expand floating-point library function operators.
98     for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM,
99                     ISD::FMA})
100       setOperationAction(Op, T, Expand);
101     // Note supported floating-point library function operators that otherwise
102     // default to expand.
103     for (auto Op :
104          {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT})
105       setOperationAction(Op, T, Legal);
106     // Support minnan and maxnan, which otherwise default to expand.
107     setOperationAction(ISD::FMINNAN, T, Legal);
108     setOperationAction(ISD::FMAXNAN, T, Legal);
109     // WebAssembly currently has no builtin f16 support.
110     setOperationAction(ISD::FP16_TO_FP, T, Expand);
111     setOperationAction(ISD::FP_TO_FP16, T, Expand);
112     setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand);
113     setTruncStoreAction(T, MVT::f16, Expand);
114   }
115 
116   for (auto T : {MVT::i32, MVT::i64}) {
117     // Expand unavailable integer operations.
118     for (auto Op :
119          {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI,
120           ISD::MULHS, ISD::MULHU, ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS,
121           ISD::SRA_PARTS, ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC,
122           ISD::SUBE}) {
123       setOperationAction(Op, T, Expand);
124     }
125   }
126 
127   // There is no i64x2.mul instruction
128   setOperationAction(ISD::MUL, MVT::v2i64, Expand);
129 
130   // As a special case, these operators use the type to mean the type to
131   // sign-extend from.
132   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
133   if (!Subtarget->hasSignExt()) {
134     for (auto T : {MVT::i8, MVT::i16, MVT::i32})
135       setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
136   }
137 
138   // Dynamic stack allocation: use the default expansion.
139   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
140   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
141   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);
142 
143   setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
144   setOperationAction(ISD::CopyToReg, MVT::Other, Custom);
145 
146   // Expand these forms; we pattern-match the forms that we can handle in isel.
147   for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
148     for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
149       setOperationAction(Op, T, Expand);
150 
151   // We have custom switch handling.
152   setOperationAction(ISD::BR_JT, MVT::Other, Custom);
153 
154   // WebAssembly doesn't have:
155   //  - Floating-point extending loads.
156   //  - Floating-point truncating stores.
157   //  - i1 extending loads.
158   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
159   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
160   for (auto T : MVT::integer_valuetypes())
161     for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
162       setLoadExtAction(Ext, T, MVT::i1, Promote);
163 
164   // Trap lowers to wasm unreachable
165   setOperationAction(ISD::TRAP, MVT::Other, Legal);
166 
167   // Exception handling intrinsics
168   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
169 
170   setMaxAtomicSizeInBitsSupported(64);
171 }
172 
173 TargetLowering::AtomicExpansionKind
174 WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
175   // We have wasm instructions for these
176   switch (AI->getOperation()) {
177   case AtomicRMWInst::Add:
178   case AtomicRMWInst::Sub:
179   case AtomicRMWInst::And:
180   case AtomicRMWInst::Or:
181   case AtomicRMWInst::Xor:
182   case AtomicRMWInst::Xchg:
183     return AtomicExpansionKind::None;
184   default:
185     break;
186   }
187   return AtomicExpansionKind::CmpXChg;
188 }
189 
190 FastISel *WebAssemblyTargetLowering::createFastISel(
191     FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
192   return WebAssembly::createFastISel(FuncInfo, LibInfo);
193 }
194 
195 bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
196     const GlobalAddressSDNode * /*GA*/) const {
197   // All offsets can be folded.
198   return true;
199 }
200 
201 MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
202                                                       EVT VT) const {
203   unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
204   if (BitWidth > 1 && BitWidth < 8) BitWidth = 8;
205 
206   if (BitWidth > 64) {
207     // The shift will be lowered to a libcall, and compiler-rt libcalls expect
208     // the count to be an i32.
209     BitWidth = 32;
210     assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) &&
211            "32-bit shift counts ought to be enough for anyone");
212   }
213 
214   MVT Result = MVT::getIntegerVT(BitWidth);
215   assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE &&
216          "Unable to represent scalar shift amount type");
217   return Result;
218 }
219 
220 // Lower an fp-to-int conversion operator from the LLVM opcode, which has an
221 // undefined result on invalid/overflow, to the WebAssembly opcode, which
222 // traps on invalid/overflow.
223 static MachineBasicBlock *
224 LowerFPToInt(
225     MachineInstr &MI,
226     DebugLoc DL,
227     MachineBasicBlock *BB,
228     const TargetInstrInfo &TII,
229     bool IsUnsigned,
230     bool Int64,
231     bool Float64,
232     unsigned LoweredOpcode
233 ) {
234   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
235 
236   unsigned OutReg = MI.getOperand(0).getReg();
237   unsigned InReg = MI.getOperand(1).getReg();
238 
239   unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
240   unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
241   unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
242   unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
243   unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
244   unsigned Eqz = WebAssembly::EQZ_I32;
245   unsigned And = WebAssembly::AND_I32;
246   int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
247   int64_t Substitute = IsUnsigned ? 0 : Limit;
248   double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
249   auto &Context = BB->getParent()->getFunction().getContext();
250   Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
251 
252   const BasicBlock *LLVM_BB = BB->getBasicBlock();
253   MachineFunction *F = BB->getParent();
254   MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVM_BB);
255   MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
256   MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVM_BB);
257 
258   MachineFunction::iterator It = ++BB->getIterator();
259   F->insert(It, FalseMBB);
260   F->insert(It, TrueMBB);
261   F->insert(It, DoneMBB);
262 
263   // Transfer the remainder of BB and its successor edges to DoneMBB.
264   DoneMBB->splice(DoneMBB->begin(), BB,
265                   std::next(MachineBasicBlock::iterator(MI)),
266                   BB->end());
267   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
268 
269   BB->addSuccessor(TrueMBB);
270   BB->addSuccessor(FalseMBB);
271   TrueMBB->addSuccessor(DoneMBB);
272   FalseMBB->addSuccessor(DoneMBB);
273 
274   unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
275   Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
276   Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
277   CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
278   EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
279   FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
280   TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
281 
282   MI.eraseFromParent();
283   // For signed numbers, we can do a single comparison to determine whether
284   // fabs(x) is within range.
285   if (IsUnsigned) {
286     Tmp0 = InReg;
287   } else {
288     BuildMI(BB, DL, TII.get(Abs), Tmp0)
289         .addReg(InReg);
290   }
291   BuildMI(BB, DL, TII.get(FConst), Tmp1)
292       .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
293   BuildMI(BB, DL, TII.get(LT), CmpReg)
294       .addReg(Tmp0)
295       .addReg(Tmp1);
296 
297   // For unsigned numbers, we have to do a separate comparison with zero.
298   if (IsUnsigned) {
299     Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
300     unsigned SecondCmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
301     unsigned AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
302     BuildMI(BB, DL, TII.get(FConst), Tmp1)
303         .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
304     BuildMI(BB, DL, TII.get(GE), SecondCmpReg)
305         .addReg(Tmp0)
306         .addReg(Tmp1);
307     BuildMI(BB, DL, TII.get(And), AndReg)
308         .addReg(CmpReg)
309         .addReg(SecondCmpReg);
310     CmpReg = AndReg;
311   }
312 
313   BuildMI(BB, DL, TII.get(Eqz), EqzReg)
314       .addReg(CmpReg);
315 
316   // Create the CFG diamond to select between doing the conversion or using
317   // the substitute value.
318   BuildMI(BB, DL, TII.get(WebAssembly::BR_IF))
319       .addMBB(TrueMBB)
320       .addReg(EqzReg);
321   BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg)
322       .addReg(InReg);
323   BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR))
324       .addMBB(DoneMBB);
325   BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg)
326       .addImm(Substitute);
327   BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
328       .addReg(FalseReg)
329       .addMBB(FalseMBB)
330       .addReg(TrueReg)
331       .addMBB(TrueMBB);
332 
333   return DoneMBB;
334 }
335 
336 MachineBasicBlock *
337 WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
338     MachineInstr &MI,
339     MachineBasicBlock *BB
340 ) const {
341   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
342   DebugLoc DL = MI.getDebugLoc();
343 
344   switch (MI.getOpcode()) {
345   default: llvm_unreachable("Unexpected instr type to insert");
346   case WebAssembly::FP_TO_SINT_I32_F32:
347     return LowerFPToInt(MI, DL, BB, TII, false, false, false,
348                         WebAssembly::I32_TRUNC_S_F32);
349   case WebAssembly::FP_TO_UINT_I32_F32:
350     return LowerFPToInt(MI, DL, BB, TII, true, false, false,
351                         WebAssembly::I32_TRUNC_U_F32);
352   case WebAssembly::FP_TO_SINT_I64_F32:
353     return LowerFPToInt(MI, DL, BB, TII, false, true, false,
354                         WebAssembly::I64_TRUNC_S_F32);
355   case WebAssembly::FP_TO_UINT_I64_F32:
356     return LowerFPToInt(MI, DL, BB, TII, true, true, false,
357                         WebAssembly::I64_TRUNC_U_F32);
358   case WebAssembly::FP_TO_SINT_I32_F64:
359     return LowerFPToInt(MI, DL, BB, TII, false, false, true,
360                         WebAssembly::I32_TRUNC_S_F64);
361   case WebAssembly::FP_TO_UINT_I32_F64:
362     return LowerFPToInt(MI, DL, BB, TII, true, false, true,
363                         WebAssembly::I32_TRUNC_U_F64);
364   case WebAssembly::FP_TO_SINT_I64_F64:
365     return LowerFPToInt(MI, DL, BB, TII, false, true, true,
366                         WebAssembly::I64_TRUNC_S_F64);
367   case WebAssembly::FP_TO_UINT_I64_F64:
368     return LowerFPToInt(MI, DL, BB, TII, true, true, true,
369                         WebAssembly::I64_TRUNC_U_F64);
370   llvm_unreachable("Unexpected instruction to emit with custom inserter");
371   }
372 }
373 
374 const char *WebAssemblyTargetLowering::getTargetNodeName(
375     unsigned Opcode) const {
376   switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
377     case WebAssemblyISD::FIRST_NUMBER:
378       break;
379 #define HANDLE_NODETYPE(NODE) \
380   case WebAssemblyISD::NODE:  \
381     return "WebAssemblyISD::" #NODE;
382 #include "WebAssemblyISD.def"
383 #undef HANDLE_NODETYPE
384   }
385   return nullptr;
386 }
387 
388 std::pair<unsigned, const TargetRegisterClass *>
389 WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
390     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
391   // First, see if this is a constraint that directly corresponds to a
392   // WebAssembly register class.
393   if (Constraint.size() == 1) {
394     switch (Constraint[0]) {
395       case 'r':
396         assert(VT != MVT::iPTR && "Pointer MVT not expected here");
397         if (Subtarget->hasSIMD128() && VT.isVector()) {
398           if (VT.getSizeInBits() == 128)
399             return std::make_pair(0U, &WebAssembly::V128RegClass);
400         }
401         if (VT.isInteger() && !VT.isVector()) {
402           if (VT.getSizeInBits() <= 32)
403             return std::make_pair(0U, &WebAssembly::I32RegClass);
404           if (VT.getSizeInBits() <= 64)
405             return std::make_pair(0U, &WebAssembly::I64RegClass);
406         }
407         break;
408       default:
409         break;
410     }
411   }
412 
413   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
414 }
415 
416 bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const {
417   // Assume ctz is a relatively cheap operation.
418   return true;
419 }
420 
421 bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const {
422   // Assume clz is a relatively cheap operation.
423   return true;
424 }
425 
426 bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
427                                                       const AddrMode &AM,
428                                                       Type *Ty,
429                                                       unsigned AS,
430                                                       Instruction *I) const {
431   // WebAssembly offsets are added as unsigned without wrapping. The
432   // isLegalAddressingMode gives us no way to determine if wrapping could be
433   // happening, so we approximate this by accepting only non-negative offsets.
434   if (AM.BaseOffs < 0) return false;
435 
436   // WebAssembly has no scale register operands.
437   if (AM.Scale != 0) return false;
438 
439   // Everything else is legal.
440   return true;
441 }
442 
443 bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
444     EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/, bool *Fast) const {
445   // WebAssembly supports unaligned accesses, though it should be declared
446   // with the p2align attribute on loads and stores which do so, and there
447   // may be a performance impact. We tell LLVM they're "fast" because
448   // for the kinds of things that LLVM uses this for (merging adjacent stores
449   // of constants, etc.), WebAssembly implementations will either want the
450   // unaligned access or they'll split anyway.
451   if (Fast) *Fast = true;
452   return true;
453 }
454 
455 bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
456                                               AttributeList Attr) const {
457   // The current thinking is that wasm engines will perform this optimization,
458   // so we can save on code size.
459   return true;
460 }
461 
462 EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
463                                                   LLVMContext &C,
464                                                   EVT VT) const {
465   if (VT.isVector())
466     return VT.changeVectorElementTypeToInteger();
467 
468   return TargetLowering::getSetCCResultType(DL, C, VT);
469 }
470 
471 bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
472                                                    const CallInst &I,
473                                                    MachineFunction &MF,
474                                                    unsigned Intrinsic) const {
475   switch (Intrinsic) {
476   case Intrinsic::wasm_atomic_notify:
477     Info.opc = ISD::INTRINSIC_W_CHAIN;
478     Info.memVT = MVT::i32;
479     Info.ptrVal = I.getArgOperand(0);
480     Info.offset = 0;
481     Info.align = 4;
482     // atomic.notify instruction does not really load the memory specified with
483     // this argument, but MachineMemOperand should either be load or store, so
484     // we set this to a load.
485     // FIXME Volatile isn't really correct, but currently all LLVM atomic
486     // instructions are treated as volatiles in the backend, so we should be
487     // consistent. The same applies for wasm_atomic_wait intrinsics too.
488     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
489     return true;
490   case Intrinsic::wasm_atomic_wait_i32:
491     Info.opc = ISD::INTRINSIC_W_CHAIN;
492     Info.memVT = MVT::i32;
493     Info.ptrVal = I.getArgOperand(0);
494     Info.offset = 0;
495     Info.align = 4;
496     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
497     return true;
498   case Intrinsic::wasm_atomic_wait_i64:
499     Info.opc = ISD::INTRINSIC_W_CHAIN;
500     Info.memVT = MVT::i64;
501     Info.ptrVal = I.getArgOperand(0);
502     Info.offset = 0;
503     Info.align = 8;
504     Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
505     return true;
506   default:
507     return false;
508   }
509 }
510 
511 //===----------------------------------------------------------------------===//
512 // WebAssembly Lowering private implementation.
513 //===----------------------------------------------------------------------===//
514 
515 //===----------------------------------------------------------------------===//
516 // Lowering Code
517 //===----------------------------------------------------------------------===//
518 
519 static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *msg) {
520   MachineFunction &MF = DAG.getMachineFunction();
521   DAG.getContext()->diagnose(
522       DiagnosticInfoUnsupported(MF.getFunction(), msg, DL.getDebugLoc()));
523 }
524 
525 // Test whether the given calling convention is supported.
526 static bool CallingConvSupported(CallingConv::ID CallConv) {
527   // We currently support the language-independent target-independent
528   // conventions. We don't yet have a way to annotate calls with properties like
529   // "cold", and we don't have any call-clobbered registers, so these are mostly
530   // all handled the same.
531   return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
532          CallConv == CallingConv::Cold ||
533          CallConv == CallingConv::PreserveMost ||
534          CallConv == CallingConv::PreserveAll ||
535          CallConv == CallingConv::CXX_FAST_TLS;
536 }
537 
538 SDValue WebAssemblyTargetLowering::LowerCall(
539     CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const {
540   SelectionDAG &DAG = CLI.DAG;
541   SDLoc DL = CLI.DL;
542   SDValue Chain = CLI.Chain;
543   SDValue Callee = CLI.Callee;
544   MachineFunction &MF = DAG.getMachineFunction();
545   auto Layout = MF.getDataLayout();
546 
547   CallingConv::ID CallConv = CLI.CallConv;
548   if (!CallingConvSupported(CallConv))
549     fail(DL, DAG,
550          "WebAssembly doesn't support language-specific or target-specific "
551          "calling conventions yet");
552   if (CLI.IsPatchPoint)
553     fail(DL, DAG, "WebAssembly doesn't support patch point yet");
554 
555   // WebAssembly doesn't currently support explicit tail calls. If they are
556   // required, fail. Otherwise, just disable them.
557   if ((CallConv == CallingConv::Fast && CLI.IsTailCall &&
558        MF.getTarget().Options.GuaranteedTailCallOpt) ||
559       (CLI.CS && CLI.CS.isMustTailCall()))
560     fail(DL, DAG, "WebAssembly doesn't support tail call yet");
561   CLI.IsTailCall = false;
562 
563   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
564   if (Ins.size() > 1)
565     fail(DL, DAG, "WebAssembly doesn't support more than 1 returned value yet");
566 
567   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
568   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
569   unsigned NumFixedArgs = 0;
570   for (unsigned i = 0; i < Outs.size(); ++i) {
571     const ISD::OutputArg &Out = Outs[i];
572     SDValue &OutVal = OutVals[i];
573     if (Out.Flags.isNest())
574       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
575     if (Out.Flags.isInAlloca())
576       fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
577     if (Out.Flags.isInConsecutiveRegs())
578       fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
579     if (Out.Flags.isInConsecutiveRegsLast())
580       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
581     if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
582       auto &MFI = MF.getFrameInfo();
583       int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
584                                      Out.Flags.getByValAlign(),
585                                      /*isSS=*/false);
586       SDValue SizeNode =
587           DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
588       SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
589       Chain = DAG.getMemcpy(
590           Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getByValAlign(),
591           /*isVolatile*/ false, /*AlwaysInline=*/false,
592           /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
593       OutVal = FINode;
594     }
595     // Count the number of fixed args *after* legalization.
596     NumFixedArgs += Out.IsFixed;
597   }
598 
599   bool IsVarArg = CLI.IsVarArg;
600   auto PtrVT = getPointerTy(Layout);
601 
602   // Analyze operands of the call, assigning locations to each operand.
603   SmallVector<CCValAssign, 16> ArgLocs;
604   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
605 
606   if (IsVarArg) {
607     // Outgoing non-fixed arguments are placed in a buffer. First
608     // compute their offsets and the total amount of buffer space needed.
609     for (SDValue Arg :
610          make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) {
611       EVT VT = Arg.getValueType();
612       assert(VT != MVT::iPTR && "Legalized args should be concrete");
613       Type *Ty = VT.getTypeForEVT(*DAG.getContext());
614       unsigned Offset = CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty),
615                                              Layout.getABITypeAlignment(Ty));
616       CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
617                                         Offset, VT.getSimpleVT(),
618                                         CCValAssign::Full));
619     }
620   }
621 
622   unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
623 
624   SDValue FINode;
625   if (IsVarArg && NumBytes) {
626     // For non-fixed arguments, next emit stores to store the argument values
627     // to the stack buffer at the offsets computed above.
628     int FI = MF.getFrameInfo().CreateStackObject(NumBytes,
629                                                  Layout.getStackAlignment(),
630                                                  /*isSS=*/false);
631     unsigned ValNo = 0;
632     SmallVector<SDValue, 8> Chains;
633     for (SDValue Arg :
634          make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) {
635       assert(ArgLocs[ValNo].getValNo() == ValNo &&
636              "ArgLocs should remain in order and only hold varargs args");
637       unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
638       FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
639       SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
640                                 DAG.getConstant(Offset, DL, PtrVT));
641       Chains.push_back(DAG.getStore(
642           Chain, DL, Arg, Add,
643           MachinePointerInfo::getFixedStack(MF, FI, Offset), 0));
644     }
645     if (!Chains.empty())
646       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
647   } else if (IsVarArg) {
648     FINode = DAG.getIntPtrConstant(0, DL);
649   }
650 
651   // Compute the operands for the CALLn node.
652   SmallVector<SDValue, 16> Ops;
653   Ops.push_back(Chain);
654   Ops.push_back(Callee);
655 
656   // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
657   // isn't reliable.
658   Ops.append(OutVals.begin(),
659              IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
660   // Add a pointer to the vararg buffer.
661   if (IsVarArg) Ops.push_back(FINode);
662 
663   SmallVector<EVT, 8> InTys;
664   for (const auto &In : Ins) {
665     assert(!In.Flags.isByVal() && "byval is not valid for return values");
666     assert(!In.Flags.isNest() && "nest is not valid for return values");
667     if (In.Flags.isInAlloca())
668       fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
669     if (In.Flags.isInConsecutiveRegs())
670       fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
671     if (In.Flags.isInConsecutiveRegsLast())
672       fail(DL, DAG,
673            "WebAssembly hasn't implemented cons regs last return values");
674     // Ignore In.getOrigAlign() because all our arguments are passed in
675     // registers.
676     InTys.push_back(In.VT);
677   }
678   InTys.push_back(MVT::Other);
679   SDVTList InTyList = DAG.getVTList(InTys);
680   SDValue Res =
681       DAG.getNode(Ins.empty() ? WebAssemblyISD::CALL0 : WebAssemblyISD::CALL1,
682                   DL, InTyList, Ops);
683   if (Ins.empty()) {
684     Chain = Res;
685   } else {
686     InVals.push_back(Res);
687     Chain = Res.getValue(1);
688   }
689 
690   return Chain;
691 }
692 
693 bool WebAssemblyTargetLowering::CanLowerReturn(
694     CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
695     const SmallVectorImpl<ISD::OutputArg> &Outs,
696     LLVMContext & /*Context*/) const {
697   // WebAssembly can't currently handle returning tuples.
698   return Outs.size() <= 1;
699 }
700 
701 SDValue WebAssemblyTargetLowering::LowerReturn(
702     SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
703     const SmallVectorImpl<ISD::OutputArg> &Outs,
704     const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
705     SelectionDAG &DAG) const {
706   assert(Outs.size() <= 1 && "WebAssembly can only return up to one value");
707   if (!CallingConvSupported(CallConv))
708     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
709 
710   SmallVector<SDValue, 4> RetOps(1, Chain);
711   RetOps.append(OutVals.begin(), OutVals.end());
712   Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
713 
714   // Record the number and types of the return values.
715   for (const ISD::OutputArg &Out : Outs) {
716     assert(!Out.Flags.isByVal() && "byval is not valid for return values");
717     assert(!Out.Flags.isNest() && "nest is not valid for return values");
718     assert(Out.IsFixed && "non-fixed return value is not valid");
719     if (Out.Flags.isInAlloca())
720       fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
721     if (Out.Flags.isInConsecutiveRegs())
722       fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
723     if (Out.Flags.isInConsecutiveRegsLast())
724       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
725   }
726 
727   return Chain;
728 }
729 
730 SDValue WebAssemblyTargetLowering::LowerFormalArguments(
731     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
732     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
733     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
734   if (!CallingConvSupported(CallConv))
735     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
736 
737   MachineFunction &MF = DAG.getMachineFunction();
738   auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
739 
740   // Set up the incoming ARGUMENTS value, which serves to represent the liveness
741   // of the incoming values before they're represented by virtual registers.
742   MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
743 
744   for (const ISD::InputArg &In : Ins) {
745     if (In.Flags.isInAlloca())
746       fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
747     if (In.Flags.isNest())
748       fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
749     if (In.Flags.isInConsecutiveRegs())
750       fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
751     if (In.Flags.isInConsecutiveRegsLast())
752       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
753     // Ignore In.getOrigAlign() because all our arguments are passed in
754     // registers.
755     InVals.push_back(
756         In.Used
757             ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
758                           DAG.getTargetConstant(InVals.size(), DL, MVT::i32))
759             : DAG.getUNDEF(In.VT));
760 
761     // Record the number and types of arguments.
762     MFI->addParam(In.VT);
763   }
764 
765   // Varargs are copied into a buffer allocated by the caller, and a pointer to
766   // the buffer is passed as an argument.
767   if (IsVarArg) {
768     MVT PtrVT = getPointerTy(MF.getDataLayout());
769     unsigned VarargVreg =
770         MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT));
771     MFI->setVarargBufferVreg(VarargVreg);
772     Chain = DAG.getCopyToReg(
773         Chain, DL, VarargVreg,
774         DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
775                     DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
776     MFI->addParam(PtrVT);
777   }
778 
779   // Record the number and types of results.
780   SmallVector<MVT, 4> Params;
781   SmallVector<MVT, 4> Results;
782   ComputeSignatureVTs(MF.getFunction(), DAG.getTarget(), Params, Results);
783   for (MVT VT : Results)
784     MFI->addResult(VT);
785 
786   return Chain;
787 }
788 
789 //===----------------------------------------------------------------------===//
790 //  Custom lowering hooks.
791 //===----------------------------------------------------------------------===//
792 
793 SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
794                                                   SelectionDAG &DAG) const {
795   SDLoc DL(Op);
796   switch (Op.getOpcode()) {
797     default:
798       llvm_unreachable("unimplemented operation lowering");
799       return SDValue();
800     case ISD::FrameIndex:
801       return LowerFrameIndex(Op, DAG);
802     case ISD::GlobalAddress:
803       return LowerGlobalAddress(Op, DAG);
804     case ISD::ExternalSymbol:
805       return LowerExternalSymbol(Op, DAG);
806     case ISD::JumpTable:
807       return LowerJumpTable(Op, DAG);
808     case ISD::BR_JT:
809       return LowerBR_JT(Op, DAG);
810     case ISD::VASTART:
811       return LowerVASTART(Op, DAG);
812     case ISD::BlockAddress:
813     case ISD::BRIND:
814       fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
815       return SDValue();
816     case ISD::RETURNADDR: // Probably nothing meaningful can be returned here.
817       fail(DL, DAG, "WebAssembly hasn't implemented __builtin_return_address");
818       return SDValue();
819     case ISD::FRAMEADDR:
820       return LowerFRAMEADDR(Op, DAG);
821     case ISD::CopyToReg:
822       return LowerCopyToReg(Op, DAG);
823     case ISD::INTRINSIC_WO_CHAIN:
824       return LowerINTRINSIC_WO_CHAIN(Op, DAG);
825   }
826 }
827 
828 SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
829                                                   SelectionDAG &DAG) const {
830   SDValue Src = Op.getOperand(2);
831   if (isa<FrameIndexSDNode>(Src.getNode())) {
832     // CopyToReg nodes don't support FrameIndex operands. Other targets select
833     // the FI to some LEA-like instruction, but since we don't have that, we
834     // need to insert some kind of instruction that can take an FI operand and
835     // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
836     // copy_local between Op and its FI operand.
837     SDValue Chain = Op.getOperand(0);
838     SDLoc DL(Op);
839     unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
840     EVT VT = Src.getValueType();
841     SDValue Copy(
842         DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
843                                           : WebAssembly::COPY_I64,
844                            DL, VT, Src),
845         0);
846     return Op.getNode()->getNumValues() == 1
847                ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
848                : DAG.getCopyToReg(Chain, DL, Reg, Copy, Op.getNumOperands() == 4
849                                                             ? Op.getOperand(3)
850                                                             : SDValue());
851   }
852   return SDValue();
853 }
854 
855 SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
856                                                    SelectionDAG &DAG) const {
857   int FI = cast<FrameIndexSDNode>(Op)->getIndex();
858   return DAG.getTargetFrameIndex(FI, Op.getValueType());
859 }
860 
861 SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
862                                                   SelectionDAG &DAG) const {
863   // Non-zero depths are not supported by WebAssembly currently. Use the
864   // legalizer's default expansion, which is to return 0 (what this function is
865   // documented to do).
866   if (Op.getConstantOperandVal(0) > 0)
867     return SDValue();
868 
869   DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
870   EVT VT = Op.getValueType();
871   unsigned FP =
872       Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
873   return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
874 }
875 
876 SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
877                                                       SelectionDAG &DAG) const {
878   SDLoc DL(Op);
879   const auto *GA = cast<GlobalAddressSDNode>(Op);
880   EVT VT = Op.getValueType();
881   assert(GA->getTargetFlags() == 0 &&
882          "Unexpected target flags on generic GlobalAddressSDNode");
883   if (GA->getAddressSpace() != 0)
884     fail(DL, DAG, "WebAssembly only expects the 0 address space");
885   return DAG.getNode(
886       WebAssemblyISD::Wrapper, DL, VT,
887       DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset()));
888 }
889 
890 SDValue WebAssemblyTargetLowering::LowerExternalSymbol(
891     SDValue Op, SelectionDAG &DAG) const {
892   SDLoc DL(Op);
893   const auto *ES = cast<ExternalSymbolSDNode>(Op);
894   EVT VT = Op.getValueType();
895   assert(ES->getTargetFlags() == 0 &&
896          "Unexpected target flags on generic ExternalSymbolSDNode");
897   // Set the TargetFlags to 0x1 which indicates that this is a "function"
898   // symbol rather than a data symbol. We do this unconditionally even though
899   // we don't know anything about the symbol other than its name, because all
900   // external symbols used in target-independent SelectionDAG code are for
901   // functions.
902   return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
903                      DAG.getTargetExternalSymbol(ES->getSymbol(), VT,
904                                                  WebAssemblyII::MO_SYMBOL_FUNCTION));
905 }
906 
907 SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
908                                                   SelectionDAG &DAG) const {
909   // There's no need for a Wrapper node because we always incorporate a jump
910   // table operand into a BR_TABLE instruction, rather than ever
911   // materializing it in a register.
912   const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
913   return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
914                                 JT->getTargetFlags());
915 }
916 
917 SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
918                                               SelectionDAG &DAG) const {
919   SDLoc DL(Op);
920   SDValue Chain = Op.getOperand(0);
921   const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
922   SDValue Index = Op.getOperand(2);
923   assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
924 
925   SmallVector<SDValue, 8> Ops;
926   Ops.push_back(Chain);
927   Ops.push_back(Index);
928 
929   MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
930   const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
931 
932   // Add an operand for each case.
933   for (auto MBB : MBBs) Ops.push_back(DAG.getBasicBlock(MBB));
934 
935   // TODO: For now, we just pick something arbitrary for a default case for now.
936   // We really want to sniff out the guard and put in the real default case (and
937   // delete the guard).
938   Ops.push_back(DAG.getBasicBlock(MBBs[0]));
939 
940   return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
941 }
942 
943 SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
944                                                 SelectionDAG &DAG) const {
945   SDLoc DL(Op);
946   EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
947 
948   auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
949   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
950 
951   SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
952                                     MFI->getVarargBufferVreg(), PtrVT);
953   return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
954                       MachinePointerInfo(SV), 0);
955 }
956 
957 SDValue
958 WebAssemblyTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
959                                                    SelectionDAG &DAG) const {
960   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
961   SDLoc DL(Op);
962   switch (IntNo) {
963   default:
964     return {}; // Don't custom lower most intrinsics.
965 
966   case Intrinsic::wasm_lsda:
967     // TODO For now, just return 0 not to crash
968     return DAG.getConstant(0, DL, Op.getValueType());
969   }
970 }
971 
972 //===----------------------------------------------------------------------===//
973 //                          WebAssembly Optimization Hooks
974 //===----------------------------------------------------------------------===//
975