1 //=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// This file implements the WebAssemblyTargetLowering class. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "WebAssemblyISelLowering.h" 16 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 17 #include "WebAssemblyMachineFunctionInfo.h" 18 #include "WebAssemblySubtarget.h" 19 #include "WebAssemblyTargetMachine.h" 20 #include "llvm/CodeGen/Analysis.h" 21 #include "llvm/CodeGen/CallingConvLower.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineJumpTableInfo.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 #include "llvm/CodeGen/SelectionDAG.h" 26 #include "llvm/IR/DiagnosticInfo.h" 27 #include "llvm/IR/DiagnosticPrinter.h" 28 #include "llvm/IR/Function.h" 29 #include "llvm/IR/Intrinsics.h" 30 #include "llvm/Support/Debug.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/Support/raw_ostream.h" 33 #include "llvm/Target/TargetOptions.h" 34 using namespace llvm; 35 36 #define DEBUG_TYPE "wasm-lower" 37 38 WebAssemblyTargetLowering::WebAssemblyTargetLowering( 39 const TargetMachine &TM, const WebAssemblySubtarget &STI) 40 : TargetLowering(TM), Subtarget(&STI) { 41 auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32; 42 43 // Booleans always contain 0 or 1. 44 setBooleanContents(ZeroOrOneBooleanContent); 45 // WebAssembly does not produce floating-point exceptions on normal floating 46 // point operations. 47 setHasFloatingPointExceptions(false); 48 // We don't know the microarchitecture here, so just reduce register pressure. 49 setSchedulingPreference(Sched::RegPressure); 50 // Tell ISel that we have a stack pointer. 51 setStackPointerRegisterToSaveRestore( 52 Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32); 53 // Set up the register classes. 54 addRegisterClass(MVT::i32, &WebAssembly::I32RegClass); 55 addRegisterClass(MVT::i64, &WebAssembly::I64RegClass); 56 addRegisterClass(MVT::f32, &WebAssembly::F32RegClass); 57 addRegisterClass(MVT::f64, &WebAssembly::F64RegClass); 58 if (Subtarget->hasSIMD128()) { 59 addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass); 60 addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass); 61 addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass); 62 addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass); 63 addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass); 64 addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass); 65 } 66 // Compute derived properties from the register classes. 67 computeRegisterProperties(Subtarget->getRegisterInfo()); 68 69 setOperationAction(ISD::GlobalAddress, MVTPtr, Custom); 70 setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom); 71 setOperationAction(ISD::JumpTable, MVTPtr, Custom); 72 setOperationAction(ISD::BlockAddress, MVTPtr, Custom); 73 setOperationAction(ISD::BRIND, MVT::Other, Custom); 74 75 // Take the default expansion for va_arg, va_copy, and va_end. There is no 76 // default action for va_start, so we do that custom. 77 setOperationAction(ISD::VASTART, MVT::Other, Custom); 78 setOperationAction(ISD::VAARG, MVT::Other, Expand); 79 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 80 setOperationAction(ISD::VAEND, MVT::Other, Expand); 81 82 for (auto T : {MVT::f32, MVT::f64}) { 83 // Don't expand the floating-point types to constant pools. 84 setOperationAction(ISD::ConstantFP, T, Legal); 85 // Expand floating-point comparisons. 86 for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE, 87 ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE}) 88 setCondCodeAction(CC, T, Expand); 89 // Expand floating-point library function operators. 90 for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, 91 ISD::FMA}) 92 setOperationAction(Op, T, Expand); 93 // Note supported floating-point library function operators that otherwise 94 // default to expand. 95 for (auto Op : 96 {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT}) 97 setOperationAction(Op, T, Legal); 98 // Support minnan and maxnan, which otherwise default to expand. 99 setOperationAction(ISD::FMINNAN, T, Legal); 100 setOperationAction(ISD::FMAXNAN, T, Legal); 101 // WebAssembly currently has no builtin f16 support. 102 setOperationAction(ISD::FP16_TO_FP, T, Expand); 103 setOperationAction(ISD::FP_TO_FP16, T, Expand); 104 setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand); 105 setTruncStoreAction(T, MVT::f16, Expand); 106 } 107 108 for (auto T : {MVT::i32, MVT::i64}) { 109 // Expand unavailable integer operations. 110 for (auto Op : 111 {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, 112 ISD::MULHS, ISD::MULHU, ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, 113 ISD::SRA_PARTS, ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, 114 ISD::SUBE}) { 115 setOperationAction(Op, T, Expand); 116 } 117 } 118 119 // As a special case, these operators use the type to mean the type to 120 // sign-extend from. 121 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 122 if (!Subtarget->hasSignExt()) { 123 for (auto T : {MVT::i8, MVT::i16, MVT::i32}) 124 setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand); 125 } 126 127 // Dynamic stack allocation: use the default expansion. 128 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 129 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 130 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand); 131 132 setOperationAction(ISD::FrameIndex, MVT::i32, Custom); 133 setOperationAction(ISD::CopyToReg, MVT::Other, Custom); 134 135 // Expand these forms; we pattern-match the forms that we can handle in isel. 136 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) 137 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC}) 138 setOperationAction(Op, T, Expand); 139 140 // We have custom switch handling. 141 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 142 143 // WebAssembly doesn't have: 144 // - Floating-point extending loads. 145 // - Floating-point truncating stores. 146 // - i1 extending loads. 147 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 148 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 149 for (auto T : MVT::integer_valuetypes()) 150 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}) 151 setLoadExtAction(Ext, T, MVT::i1, Promote); 152 153 // Trap lowers to wasm unreachable 154 setOperationAction(ISD::TRAP, MVT::Other, Legal); 155 156 // Exception handling intrinsics 157 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 158 159 setMaxAtomicSizeInBitsSupported(64); 160 } 161 162 TargetLowering::AtomicExpansionKind 163 WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 164 // We have wasm instructions for these 165 switch (AI->getOperation()) { 166 case AtomicRMWInst::Add: 167 case AtomicRMWInst::Sub: 168 case AtomicRMWInst::And: 169 case AtomicRMWInst::Or: 170 case AtomicRMWInst::Xor: 171 case AtomicRMWInst::Xchg: 172 return AtomicExpansionKind::None; 173 default: 174 break; 175 } 176 return AtomicExpansionKind::CmpXChg; 177 } 178 179 FastISel *WebAssemblyTargetLowering::createFastISel( 180 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const { 181 return WebAssembly::createFastISel(FuncInfo, LibInfo); 182 } 183 184 bool WebAssemblyTargetLowering::isOffsetFoldingLegal( 185 const GlobalAddressSDNode * /*GA*/) const { 186 // All offsets can be folded. 187 return true; 188 } 189 190 MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/, 191 EVT VT) const { 192 unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1); 193 if (BitWidth > 1 && BitWidth < 8) BitWidth = 8; 194 195 if (BitWidth > 64) { 196 // The shift will be lowered to a libcall, and compiler-rt libcalls expect 197 // the count to be an i32. 198 BitWidth = 32; 199 assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) && 200 "32-bit shift counts ought to be enough for anyone"); 201 } 202 203 MVT Result = MVT::getIntegerVT(BitWidth); 204 assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE && 205 "Unable to represent scalar shift amount type"); 206 return Result; 207 } 208 209 // Lower an fp-to-int conversion operator from the LLVM opcode, which has an 210 // undefined result on invalid/overflow, to the WebAssembly opcode, which 211 // traps on invalid/overflow. 212 static MachineBasicBlock * 213 LowerFPToInt( 214 MachineInstr &MI, 215 DebugLoc DL, 216 MachineBasicBlock *BB, 217 const TargetInstrInfo &TII, 218 bool IsUnsigned, 219 bool Int64, 220 bool Float64, 221 unsigned LoweredOpcode 222 ) { 223 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 224 225 unsigned OutReg = MI.getOperand(0).getReg(); 226 unsigned InReg = MI.getOperand(1).getReg(); 227 228 unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32; 229 unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32; 230 unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32; 231 unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32; 232 unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32; 233 unsigned Eqz = WebAssembly::EQZ_I32; 234 unsigned And = WebAssembly::AND_I32; 235 int64_t Limit = Int64 ? INT64_MIN : INT32_MIN; 236 int64_t Substitute = IsUnsigned ? 0 : Limit; 237 double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit; 238 auto &Context = BB->getParent()->getFunction().getContext(); 239 Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context); 240 241 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 242 MachineFunction *F = BB->getParent(); 243 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVM_BB); 244 MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVM_BB); 245 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVM_BB); 246 247 MachineFunction::iterator It = ++BB->getIterator(); 248 F->insert(It, FalseMBB); 249 F->insert(It, TrueMBB); 250 F->insert(It, DoneMBB); 251 252 // Transfer the remainder of BB and its successor edges to DoneMBB. 253 DoneMBB->splice(DoneMBB->begin(), BB, 254 std::next(MachineBasicBlock::iterator(MI)), 255 BB->end()); 256 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 257 258 BB->addSuccessor(TrueMBB); 259 BB->addSuccessor(FalseMBB); 260 TrueMBB->addSuccessor(DoneMBB); 261 FalseMBB->addSuccessor(DoneMBB); 262 263 unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg; 264 Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); 265 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); 266 CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 267 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 268 FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg)); 269 TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg)); 270 271 MI.eraseFromParent(); 272 // For signed numbers, we can do a single comparison to determine whether 273 // fabs(x) is within range. 274 if (IsUnsigned) { 275 Tmp0 = InReg; 276 } else { 277 BuildMI(BB, DL, TII.get(Abs), Tmp0) 278 .addReg(InReg); 279 } 280 BuildMI(BB, DL, TII.get(FConst), Tmp1) 281 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal))); 282 BuildMI(BB, DL, TII.get(LT), CmpReg) 283 .addReg(Tmp0) 284 .addReg(Tmp1); 285 286 // For unsigned numbers, we have to do a separate comparison with zero. 287 if (IsUnsigned) { 288 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); 289 unsigned SecondCmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 290 unsigned AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 291 BuildMI(BB, DL, TII.get(FConst), Tmp1) 292 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0))); 293 BuildMI(BB, DL, TII.get(GE), SecondCmpReg) 294 .addReg(Tmp0) 295 .addReg(Tmp1); 296 BuildMI(BB, DL, TII.get(And), AndReg) 297 .addReg(CmpReg) 298 .addReg(SecondCmpReg); 299 CmpReg = AndReg; 300 } 301 302 BuildMI(BB, DL, TII.get(Eqz), EqzReg) 303 .addReg(CmpReg); 304 305 // Create the CFG diamond to select between doing the conversion or using 306 // the substitute value. 307 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)) 308 .addMBB(TrueMBB) 309 .addReg(EqzReg); 310 BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg) 311 .addReg(InReg); 312 BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)) 313 .addMBB(DoneMBB); 314 BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg) 315 .addImm(Substitute); 316 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg) 317 .addReg(FalseReg) 318 .addMBB(FalseMBB) 319 .addReg(TrueReg) 320 .addMBB(TrueMBB); 321 322 return DoneMBB; 323 } 324 325 MachineBasicBlock * 326 WebAssemblyTargetLowering::EmitInstrWithCustomInserter( 327 MachineInstr &MI, 328 MachineBasicBlock *BB 329 ) const { 330 const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); 331 DebugLoc DL = MI.getDebugLoc(); 332 333 switch (MI.getOpcode()) { 334 default: llvm_unreachable("Unexpected instr type to insert"); 335 case WebAssembly::FP_TO_SINT_I32_F32: 336 return LowerFPToInt(MI, DL, BB, TII, false, false, false, 337 WebAssembly::I32_TRUNC_S_F32); 338 case WebAssembly::FP_TO_UINT_I32_F32: 339 return LowerFPToInt(MI, DL, BB, TII, true, false, false, 340 WebAssembly::I32_TRUNC_U_F32); 341 case WebAssembly::FP_TO_SINT_I64_F32: 342 return LowerFPToInt(MI, DL, BB, TII, false, true, false, 343 WebAssembly::I64_TRUNC_S_F32); 344 case WebAssembly::FP_TO_UINT_I64_F32: 345 return LowerFPToInt(MI, DL, BB, TII, true, true, false, 346 WebAssembly::I64_TRUNC_U_F32); 347 case WebAssembly::FP_TO_SINT_I32_F64: 348 return LowerFPToInt(MI, DL, BB, TII, false, false, true, 349 WebAssembly::I32_TRUNC_S_F64); 350 case WebAssembly::FP_TO_UINT_I32_F64: 351 return LowerFPToInt(MI, DL, BB, TII, true, false, true, 352 WebAssembly::I32_TRUNC_U_F64); 353 case WebAssembly::FP_TO_SINT_I64_F64: 354 return LowerFPToInt(MI, DL, BB, TII, false, true, true, 355 WebAssembly::I64_TRUNC_S_F64); 356 case WebAssembly::FP_TO_UINT_I64_F64: 357 return LowerFPToInt(MI, DL, BB, TII, true, true, true, 358 WebAssembly::I64_TRUNC_U_F64); 359 llvm_unreachable("Unexpected instruction to emit with custom inserter"); 360 } 361 } 362 363 const char *WebAssemblyTargetLowering::getTargetNodeName( 364 unsigned Opcode) const { 365 switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) { 366 case WebAssemblyISD::FIRST_NUMBER: 367 break; 368 #define HANDLE_NODETYPE(NODE) \ 369 case WebAssemblyISD::NODE: \ 370 return "WebAssemblyISD::" #NODE; 371 #include "WebAssemblyISD.def" 372 #undef HANDLE_NODETYPE 373 } 374 return nullptr; 375 } 376 377 std::pair<unsigned, const TargetRegisterClass *> 378 WebAssemblyTargetLowering::getRegForInlineAsmConstraint( 379 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { 380 // First, see if this is a constraint that directly corresponds to a 381 // WebAssembly register class. 382 if (Constraint.size() == 1) { 383 switch (Constraint[0]) { 384 case 'r': 385 assert(VT != MVT::iPTR && "Pointer MVT not expected here"); 386 if (Subtarget->hasSIMD128() && VT.isVector()) { 387 if (VT.getSizeInBits() == 128) 388 return std::make_pair(0U, &WebAssembly::V128RegClass); 389 } 390 if (VT.isInteger() && !VT.isVector()) { 391 if (VT.getSizeInBits() <= 32) 392 return std::make_pair(0U, &WebAssembly::I32RegClass); 393 if (VT.getSizeInBits() <= 64) 394 return std::make_pair(0U, &WebAssembly::I64RegClass); 395 } 396 break; 397 default: 398 break; 399 } 400 } 401 402 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 403 } 404 405 bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const { 406 // Assume ctz is a relatively cheap operation. 407 return true; 408 } 409 410 bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const { 411 // Assume clz is a relatively cheap operation. 412 return true; 413 } 414 415 bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL, 416 const AddrMode &AM, 417 Type *Ty, 418 unsigned AS, 419 Instruction *I) const { 420 // WebAssembly offsets are added as unsigned without wrapping. The 421 // isLegalAddressingMode gives us no way to determine if wrapping could be 422 // happening, so we approximate this by accepting only non-negative offsets. 423 if (AM.BaseOffs < 0) return false; 424 425 // WebAssembly has no scale register operands. 426 if (AM.Scale != 0) return false; 427 428 // Everything else is legal. 429 return true; 430 } 431 432 bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses( 433 EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/, bool *Fast) const { 434 // WebAssembly supports unaligned accesses, though it should be declared 435 // with the p2align attribute on loads and stores which do so, and there 436 // may be a performance impact. We tell LLVM they're "fast" because 437 // for the kinds of things that LLVM uses this for (merging adjacent stores 438 // of constants, etc.), WebAssembly implementations will either want the 439 // unaligned access or they'll split anyway. 440 if (Fast) *Fast = true; 441 return true; 442 } 443 444 bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT, 445 AttributeList Attr) const { 446 // The current thinking is that wasm engines will perform this optimization, 447 // so we can save on code size. 448 return true; 449 } 450 451 EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL, 452 LLVMContext &C, 453 EVT VT) const { 454 if (VT.isVector()) 455 return VT.changeVectorElementTypeToInteger(); 456 457 return TargetLowering::getSetCCResultType(DL, C, VT); 458 } 459 460 bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 461 const CallInst &I, 462 MachineFunction &MF, 463 unsigned Intrinsic) const { 464 switch (Intrinsic) { 465 case Intrinsic::wasm_atomic_notify: 466 Info.opc = ISD::INTRINSIC_W_CHAIN; 467 Info.memVT = MVT::i32; 468 Info.ptrVal = I.getArgOperand(0); 469 Info.offset = 0; 470 Info.align = 4; 471 // atomic.notify instruction does not really load the memory specified with 472 // this argument, but MachineMemOperand should either be load or store, so 473 // we set this to a load. 474 // FIXME Volatile isn't really correct, but currently all LLVM atomic 475 // instructions are treated as volatiles in the backend, so we should be 476 // consistent. The same applies for wasm_atomic_wait intrinsics too. 477 Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; 478 return true; 479 case Intrinsic::wasm_atomic_wait_i32: 480 Info.opc = ISD::INTRINSIC_W_CHAIN; 481 Info.memVT = MVT::i32; 482 Info.ptrVal = I.getArgOperand(0); 483 Info.offset = 0; 484 Info.align = 4; 485 Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; 486 return true; 487 case Intrinsic::wasm_atomic_wait_i64: 488 Info.opc = ISD::INTRINSIC_W_CHAIN; 489 Info.memVT = MVT::i64; 490 Info.ptrVal = I.getArgOperand(0); 491 Info.offset = 0; 492 Info.align = 8; 493 Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; 494 return true; 495 default: 496 return false; 497 } 498 } 499 500 //===----------------------------------------------------------------------===// 501 // WebAssembly Lowering private implementation. 502 //===----------------------------------------------------------------------===// 503 504 //===----------------------------------------------------------------------===// 505 // Lowering Code 506 //===----------------------------------------------------------------------===// 507 508 static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *msg) { 509 MachineFunction &MF = DAG.getMachineFunction(); 510 DAG.getContext()->diagnose( 511 DiagnosticInfoUnsupported(MF.getFunction(), msg, DL.getDebugLoc())); 512 } 513 514 // Test whether the given calling convention is supported. 515 static bool CallingConvSupported(CallingConv::ID CallConv) { 516 // We currently support the language-independent target-independent 517 // conventions. We don't yet have a way to annotate calls with properties like 518 // "cold", and we don't have any call-clobbered registers, so these are mostly 519 // all handled the same. 520 return CallConv == CallingConv::C || CallConv == CallingConv::Fast || 521 CallConv == CallingConv::Cold || 522 CallConv == CallingConv::PreserveMost || 523 CallConv == CallingConv::PreserveAll || 524 CallConv == CallingConv::CXX_FAST_TLS; 525 } 526 527 SDValue WebAssemblyTargetLowering::LowerCall( 528 CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const { 529 SelectionDAG &DAG = CLI.DAG; 530 SDLoc DL = CLI.DL; 531 SDValue Chain = CLI.Chain; 532 SDValue Callee = CLI.Callee; 533 MachineFunction &MF = DAG.getMachineFunction(); 534 auto Layout = MF.getDataLayout(); 535 536 CallingConv::ID CallConv = CLI.CallConv; 537 if (!CallingConvSupported(CallConv)) 538 fail(DL, DAG, 539 "WebAssembly doesn't support language-specific or target-specific " 540 "calling conventions yet"); 541 if (CLI.IsPatchPoint) 542 fail(DL, DAG, "WebAssembly doesn't support patch point yet"); 543 544 // WebAssembly doesn't currently support explicit tail calls. If they are 545 // required, fail. Otherwise, just disable them. 546 if ((CallConv == CallingConv::Fast && CLI.IsTailCall && 547 MF.getTarget().Options.GuaranteedTailCallOpt) || 548 (CLI.CS && CLI.CS.isMustTailCall())) 549 fail(DL, DAG, "WebAssembly doesn't support tail call yet"); 550 CLI.IsTailCall = false; 551 552 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 553 if (Ins.size() > 1) 554 fail(DL, DAG, "WebAssembly doesn't support more than 1 returned value yet"); 555 556 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 557 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 558 unsigned NumFixedArgs = 0; 559 for (unsigned i = 0; i < Outs.size(); ++i) { 560 const ISD::OutputArg &Out = Outs[i]; 561 SDValue &OutVal = OutVals[i]; 562 if (Out.Flags.isNest()) 563 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); 564 if (Out.Flags.isInAlloca()) 565 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); 566 if (Out.Flags.isInConsecutiveRegs()) 567 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); 568 if (Out.Flags.isInConsecutiveRegsLast()) 569 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); 570 if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) { 571 auto &MFI = MF.getFrameInfo(); 572 int FI = MFI.CreateStackObject(Out.Flags.getByValSize(), 573 Out.Flags.getByValAlign(), 574 /*isSS=*/false); 575 SDValue SizeNode = 576 DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32); 577 SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout)); 578 Chain = DAG.getMemcpy( 579 Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getByValAlign(), 580 /*isVolatile*/ false, /*AlwaysInline=*/false, 581 /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo()); 582 OutVal = FINode; 583 } 584 // Count the number of fixed args *after* legalization. 585 NumFixedArgs += Out.IsFixed; 586 } 587 588 bool IsVarArg = CLI.IsVarArg; 589 auto PtrVT = getPointerTy(Layout); 590 591 // Analyze operands of the call, assigning locations to each operand. 592 SmallVector<CCValAssign, 16> ArgLocs; 593 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 594 595 if (IsVarArg) { 596 // Outgoing non-fixed arguments are placed in a buffer. First 597 // compute their offsets and the total amount of buffer space needed. 598 for (SDValue Arg : 599 make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) { 600 EVT VT = Arg.getValueType(); 601 assert(VT != MVT::iPTR && "Legalized args should be concrete"); 602 Type *Ty = VT.getTypeForEVT(*DAG.getContext()); 603 unsigned Offset = CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), 604 Layout.getABITypeAlignment(Ty)); 605 CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(), 606 Offset, VT.getSimpleVT(), 607 CCValAssign::Full)); 608 } 609 } 610 611 unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); 612 613 SDValue FINode; 614 if (IsVarArg && NumBytes) { 615 // For non-fixed arguments, next emit stores to store the argument values 616 // to the stack buffer at the offsets computed above. 617 int FI = MF.getFrameInfo().CreateStackObject(NumBytes, 618 Layout.getStackAlignment(), 619 /*isSS=*/false); 620 unsigned ValNo = 0; 621 SmallVector<SDValue, 8> Chains; 622 for (SDValue Arg : 623 make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) { 624 assert(ArgLocs[ValNo].getValNo() == ValNo && 625 "ArgLocs should remain in order and only hold varargs args"); 626 unsigned Offset = ArgLocs[ValNo++].getLocMemOffset(); 627 FINode = DAG.getFrameIndex(FI, getPointerTy(Layout)); 628 SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode, 629 DAG.getConstant(Offset, DL, PtrVT)); 630 Chains.push_back(DAG.getStore( 631 Chain, DL, Arg, Add, 632 MachinePointerInfo::getFixedStack(MF, FI, Offset), 0)); 633 } 634 if (!Chains.empty()) 635 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); 636 } else if (IsVarArg) { 637 FINode = DAG.getIntPtrConstant(0, DL); 638 } 639 640 // Compute the operands for the CALLn node. 641 SmallVector<SDValue, 16> Ops; 642 Ops.push_back(Chain); 643 Ops.push_back(Callee); 644 645 // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs 646 // isn't reliable. 647 Ops.append(OutVals.begin(), 648 IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end()); 649 // Add a pointer to the vararg buffer. 650 if (IsVarArg) Ops.push_back(FINode); 651 652 SmallVector<EVT, 8> InTys; 653 for (const auto &In : Ins) { 654 assert(!In.Flags.isByVal() && "byval is not valid for return values"); 655 assert(!In.Flags.isNest() && "nest is not valid for return values"); 656 if (In.Flags.isInAlloca()) 657 fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values"); 658 if (In.Flags.isInConsecutiveRegs()) 659 fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values"); 660 if (In.Flags.isInConsecutiveRegsLast()) 661 fail(DL, DAG, 662 "WebAssembly hasn't implemented cons regs last return values"); 663 // Ignore In.getOrigAlign() because all our arguments are passed in 664 // registers. 665 InTys.push_back(In.VT); 666 } 667 InTys.push_back(MVT::Other); 668 SDVTList InTyList = DAG.getVTList(InTys); 669 SDValue Res = 670 DAG.getNode(Ins.empty() ? WebAssemblyISD::CALL0 : WebAssemblyISD::CALL1, 671 DL, InTyList, Ops); 672 if (Ins.empty()) { 673 Chain = Res; 674 } else { 675 InVals.push_back(Res); 676 Chain = Res.getValue(1); 677 } 678 679 return Chain; 680 } 681 682 bool WebAssemblyTargetLowering::CanLowerReturn( 683 CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/, 684 const SmallVectorImpl<ISD::OutputArg> &Outs, 685 LLVMContext & /*Context*/) const { 686 // WebAssembly can't currently handle returning tuples. 687 return Outs.size() <= 1; 688 } 689 690 SDValue WebAssemblyTargetLowering::LowerReturn( 691 SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/, 692 const SmallVectorImpl<ISD::OutputArg> &Outs, 693 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 694 SelectionDAG &DAG) const { 695 assert(Outs.size() <= 1 && "WebAssembly can only return up to one value"); 696 if (!CallingConvSupported(CallConv)) 697 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); 698 699 SmallVector<SDValue, 4> RetOps(1, Chain); 700 RetOps.append(OutVals.begin(), OutVals.end()); 701 Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps); 702 703 // Record the number and types of the return values. 704 for (const ISD::OutputArg &Out : Outs) { 705 assert(!Out.Flags.isByVal() && "byval is not valid for return values"); 706 assert(!Out.Flags.isNest() && "nest is not valid for return values"); 707 assert(Out.IsFixed && "non-fixed return value is not valid"); 708 if (Out.Flags.isInAlloca()) 709 fail(DL, DAG, "WebAssembly hasn't implemented inalloca results"); 710 if (Out.Flags.isInConsecutiveRegs()) 711 fail(DL, DAG, "WebAssembly hasn't implemented cons regs results"); 712 if (Out.Flags.isInConsecutiveRegsLast()) 713 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results"); 714 } 715 716 return Chain; 717 } 718 719 SDValue WebAssemblyTargetLowering::LowerFormalArguments( 720 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 721 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 722 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 723 if (!CallingConvSupported(CallConv)) 724 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); 725 726 MachineFunction &MF = DAG.getMachineFunction(); 727 auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>(); 728 729 // Set up the incoming ARGUMENTS value, which serves to represent the liveness 730 // of the incoming values before they're represented by virtual registers. 731 MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS); 732 733 for (const ISD::InputArg &In : Ins) { 734 if (In.Flags.isInAlloca()) 735 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); 736 if (In.Flags.isNest()) 737 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); 738 if (In.Flags.isInConsecutiveRegs()) 739 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); 740 if (In.Flags.isInConsecutiveRegsLast()) 741 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); 742 // Ignore In.getOrigAlign() because all our arguments are passed in 743 // registers. 744 InVals.push_back( 745 In.Used 746 ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT, 747 DAG.getTargetConstant(InVals.size(), DL, MVT::i32)) 748 : DAG.getUNDEF(In.VT)); 749 750 // Record the number and types of arguments. 751 MFI->addParam(In.VT); 752 } 753 754 // Varargs are copied into a buffer allocated by the caller, and a pointer to 755 // the buffer is passed as an argument. 756 if (IsVarArg) { 757 MVT PtrVT = getPointerTy(MF.getDataLayout()); 758 unsigned VarargVreg = 759 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT)); 760 MFI->setVarargBufferVreg(VarargVreg); 761 Chain = DAG.getCopyToReg( 762 Chain, DL, VarargVreg, 763 DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT, 764 DAG.getTargetConstant(Ins.size(), DL, MVT::i32))); 765 MFI->addParam(PtrVT); 766 } 767 768 // Record the number and types of results. 769 SmallVector<MVT, 4> Params; 770 SmallVector<MVT, 4> Results; 771 ComputeSignatureVTs(MF.getFunction(), DAG.getTarget(), Params, Results); 772 for (MVT VT : Results) 773 MFI->addResult(VT); 774 775 return Chain; 776 } 777 778 //===----------------------------------------------------------------------===// 779 // Custom lowering hooks. 780 //===----------------------------------------------------------------------===// 781 782 SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op, 783 SelectionDAG &DAG) const { 784 SDLoc DL(Op); 785 switch (Op.getOpcode()) { 786 default: 787 llvm_unreachable("unimplemented operation lowering"); 788 return SDValue(); 789 case ISD::FrameIndex: 790 return LowerFrameIndex(Op, DAG); 791 case ISD::GlobalAddress: 792 return LowerGlobalAddress(Op, DAG); 793 case ISD::ExternalSymbol: 794 return LowerExternalSymbol(Op, DAG); 795 case ISD::JumpTable: 796 return LowerJumpTable(Op, DAG); 797 case ISD::BR_JT: 798 return LowerBR_JT(Op, DAG); 799 case ISD::VASTART: 800 return LowerVASTART(Op, DAG); 801 case ISD::BlockAddress: 802 case ISD::BRIND: 803 fail(DL, DAG, "WebAssembly hasn't implemented computed gotos"); 804 return SDValue(); 805 case ISD::RETURNADDR: // Probably nothing meaningful can be returned here. 806 fail(DL, DAG, "WebAssembly hasn't implemented __builtin_return_address"); 807 return SDValue(); 808 case ISD::FRAMEADDR: 809 return LowerFRAMEADDR(Op, DAG); 810 case ISD::CopyToReg: 811 return LowerCopyToReg(Op, DAG); 812 case ISD::INTRINSIC_WO_CHAIN: 813 return LowerINTRINSIC_WO_CHAIN(Op, DAG); 814 } 815 } 816 817 SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op, 818 SelectionDAG &DAG) const { 819 SDValue Src = Op.getOperand(2); 820 if (isa<FrameIndexSDNode>(Src.getNode())) { 821 // CopyToReg nodes don't support FrameIndex operands. Other targets select 822 // the FI to some LEA-like instruction, but since we don't have that, we 823 // need to insert some kind of instruction that can take an FI operand and 824 // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy 825 // copy_local between Op and its FI operand. 826 SDValue Chain = Op.getOperand(0); 827 SDLoc DL(Op); 828 unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg(); 829 EVT VT = Src.getValueType(); 830 SDValue Copy( 831 DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32 832 : WebAssembly::COPY_I64, 833 DL, VT, Src), 834 0); 835 return Op.getNode()->getNumValues() == 1 836 ? DAG.getCopyToReg(Chain, DL, Reg, Copy) 837 : DAG.getCopyToReg(Chain, DL, Reg, Copy, Op.getNumOperands() == 4 838 ? Op.getOperand(3) 839 : SDValue()); 840 } 841 return SDValue(); 842 } 843 844 SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op, 845 SelectionDAG &DAG) const { 846 int FI = cast<FrameIndexSDNode>(Op)->getIndex(); 847 return DAG.getTargetFrameIndex(FI, Op.getValueType()); 848 } 849 850 SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op, 851 SelectionDAG &DAG) const { 852 // Non-zero depths are not supported by WebAssembly currently. Use the 853 // legalizer's default expansion, which is to return 0 (what this function is 854 // documented to do). 855 if (Op.getConstantOperandVal(0) > 0) 856 return SDValue(); 857 858 DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true); 859 EVT VT = Op.getValueType(); 860 unsigned FP = 861 Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction()); 862 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT); 863 } 864 865 SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op, 866 SelectionDAG &DAG) const { 867 SDLoc DL(Op); 868 const auto *GA = cast<GlobalAddressSDNode>(Op); 869 EVT VT = Op.getValueType(); 870 assert(GA->getTargetFlags() == 0 && 871 "Unexpected target flags on generic GlobalAddressSDNode"); 872 if (GA->getAddressSpace() != 0) 873 fail(DL, DAG, "WebAssembly only expects the 0 address space"); 874 return DAG.getNode( 875 WebAssemblyISD::Wrapper, DL, VT, 876 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset())); 877 } 878 879 SDValue WebAssemblyTargetLowering::LowerExternalSymbol( 880 SDValue Op, SelectionDAG &DAG) const { 881 SDLoc DL(Op); 882 const auto *ES = cast<ExternalSymbolSDNode>(Op); 883 EVT VT = Op.getValueType(); 884 assert(ES->getTargetFlags() == 0 && 885 "Unexpected target flags on generic ExternalSymbolSDNode"); 886 // Set the TargetFlags to 0x1 which indicates that this is a "function" 887 // symbol rather than a data symbol. We do this unconditionally even though 888 // we don't know anything about the symbol other than its name, because all 889 // external symbols used in target-independent SelectionDAG code are for 890 // functions. 891 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, 892 DAG.getTargetExternalSymbol(ES->getSymbol(), VT, 893 WebAssemblyII::MO_SYMBOL_FUNCTION)); 894 } 895 896 SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op, 897 SelectionDAG &DAG) const { 898 // There's no need for a Wrapper node because we always incorporate a jump 899 // table operand into a BR_TABLE instruction, rather than ever 900 // materializing it in a register. 901 const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 902 return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(), 903 JT->getTargetFlags()); 904 } 905 906 SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op, 907 SelectionDAG &DAG) const { 908 SDLoc DL(Op); 909 SDValue Chain = Op.getOperand(0); 910 const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1)); 911 SDValue Index = Op.getOperand(2); 912 assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags"); 913 914 SmallVector<SDValue, 8> Ops; 915 Ops.push_back(Chain); 916 Ops.push_back(Index); 917 918 MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo(); 919 const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs; 920 921 // Add an operand for each case. 922 for (auto MBB : MBBs) Ops.push_back(DAG.getBasicBlock(MBB)); 923 924 // TODO: For now, we just pick something arbitrary for a default case for now. 925 // We really want to sniff out the guard and put in the real default case (and 926 // delete the guard). 927 Ops.push_back(DAG.getBasicBlock(MBBs[0])); 928 929 return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops); 930 } 931 932 SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op, 933 SelectionDAG &DAG) const { 934 SDLoc DL(Op); 935 EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout()); 936 937 auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>(); 938 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 939 940 SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL, 941 MFI->getVarargBufferVreg(), PtrVT); 942 return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1), 943 MachinePointerInfo(SV), 0); 944 } 945 946 SDValue 947 WebAssemblyTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 948 SelectionDAG &DAG) const { 949 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 950 SDLoc DL(Op); 951 switch (IntNo) { 952 default: 953 return {}; // Don't custom lower most intrinsics. 954 955 case Intrinsic::wasm_lsda: 956 // TODO For now, just return 0 not to crash 957 return DAG.getConstant(0, DL, Op.getValueType()); 958 } 959 } 960 961 //===----------------------------------------------------------------------===// 962 // WebAssembly Optimization Hooks 963 //===----------------------------------------------------------------------===// 964