1 //=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements the WebAssemblyTargetLowering class. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "WebAssemblyISelLowering.h" 15 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 16 #include "WebAssemblyMachineFunctionInfo.h" 17 #include "WebAssemblySubtarget.h" 18 #include "WebAssemblyTargetMachine.h" 19 #include "llvm/CodeGen/Analysis.h" 20 #include "llvm/CodeGen/CallingConvLower.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineJumpTableInfo.h" 23 #include "llvm/CodeGen/MachineModuleInfo.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 #include "llvm/CodeGen/SelectionDAG.h" 26 #include "llvm/CodeGen/WasmEHFuncInfo.h" 27 #include "llvm/IR/DiagnosticInfo.h" 28 #include "llvm/IR/DiagnosticPrinter.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/IR/Intrinsics.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Support/ErrorHandling.h" 33 #include "llvm/Support/raw_ostream.h" 34 #include "llvm/Target/TargetOptions.h" 35 using namespace llvm; 36 37 #define DEBUG_TYPE "wasm-lower" 38 39 WebAssemblyTargetLowering::WebAssemblyTargetLowering( 40 const TargetMachine &TM, const WebAssemblySubtarget &STI) 41 : TargetLowering(TM), Subtarget(&STI) { 42 auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32; 43 44 // Booleans always contain 0 or 1. 45 setBooleanContents(ZeroOrOneBooleanContent); 46 // Except in SIMD vectors 47 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 48 // WebAssembly does not produce floating-point exceptions on normal floating 49 // point operations. 50 setHasFloatingPointExceptions(false); 51 // We don't know the microarchitecture here, so just reduce register pressure. 52 setSchedulingPreference(Sched::RegPressure); 53 // Tell ISel that we have a stack pointer. 54 setStackPointerRegisterToSaveRestore( 55 Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32); 56 // Set up the register classes. 57 addRegisterClass(MVT::i32, &WebAssembly::I32RegClass); 58 addRegisterClass(MVT::i64, &WebAssembly::I64RegClass); 59 addRegisterClass(MVT::f32, &WebAssembly::F32RegClass); 60 addRegisterClass(MVT::f64, &WebAssembly::F64RegClass); 61 if (Subtarget->hasSIMD128()) { 62 addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass); 63 addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass); 64 addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass); 65 addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass); 66 } 67 if (Subtarget->hasUnimplementedSIMD128()) { 68 addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass); 69 addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass); 70 } 71 // Compute derived properties from the register classes. 72 computeRegisterProperties(Subtarget->getRegisterInfo()); 73 74 setOperationAction(ISD::GlobalAddress, MVTPtr, Custom); 75 setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom); 76 setOperationAction(ISD::JumpTable, MVTPtr, Custom); 77 setOperationAction(ISD::BlockAddress, MVTPtr, Custom); 78 setOperationAction(ISD::BRIND, MVT::Other, Custom); 79 80 // Take the default expansion for va_arg, va_copy, and va_end. There is no 81 // default action for va_start, so we do that custom. 82 setOperationAction(ISD::VASTART, MVT::Other, Custom); 83 setOperationAction(ISD::VAARG, MVT::Other, Expand); 84 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 85 setOperationAction(ISD::VAEND, MVT::Other, Expand); 86 87 for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) { 88 // Don't expand the floating-point types to constant pools. 89 setOperationAction(ISD::ConstantFP, T, Legal); 90 // Expand floating-point comparisons. 91 for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE, 92 ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE}) 93 setCondCodeAction(CC, T, Expand); 94 // Expand floating-point library function operators. 95 for (auto Op : 96 {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA}) 97 setOperationAction(Op, T, Expand); 98 // Note supported floating-point library function operators that otherwise 99 // default to expand. 100 for (auto Op : 101 {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT}) 102 setOperationAction(Op, T, Legal); 103 // Support minimum and maximum, which otherwise default to expand. 104 setOperationAction(ISD::FMINIMUM, T, Legal); 105 setOperationAction(ISD::FMAXIMUM, T, Legal); 106 // WebAssembly currently has no builtin f16 support. 107 setOperationAction(ISD::FP16_TO_FP, T, Expand); 108 setOperationAction(ISD::FP_TO_FP16, T, Expand); 109 setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand); 110 setTruncStoreAction(T, MVT::f16, Expand); 111 } 112 113 // Expand unavailable integer operations. 114 for (auto Op : 115 {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU, 116 ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS, 117 ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) { 118 for (auto T : {MVT::i32, MVT::i64}) 119 setOperationAction(Op, T, Expand); 120 if (Subtarget->hasSIMD128()) 121 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) 122 setOperationAction(Op, T, Expand); 123 if (Subtarget->hasUnimplementedSIMD128()) 124 setOperationAction(Op, MVT::v2i64, Expand); 125 } 126 127 // SIMD-specific configuration 128 if (Subtarget->hasSIMD128()) { 129 // Support saturating add for i8x16 and i16x8 130 for (auto Op : {ISD::SADDSAT, ISD::UADDSAT}) 131 for (auto T : {MVT::v16i8, MVT::v8i16}) 132 setOperationAction(Op, T, Legal); 133 134 // Custom lower BUILD_VECTORs to minimize number of replace_lanes 135 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) 136 setOperationAction(ISD::BUILD_VECTOR, T, Custom); 137 if (Subtarget->hasUnimplementedSIMD128()) 138 for (auto T : {MVT::v2i64, MVT::v2f64}) 139 setOperationAction(ISD::BUILD_VECTOR, T, Custom); 140 141 // We have custom shuffle lowering to expose the shuffle mask 142 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) 143 setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom); 144 if (Subtarget->hasUnimplementedSIMD128()) 145 for (auto T: {MVT::v2i64, MVT::v2f64}) 146 setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom); 147 148 // Custom lowering since wasm shifts must have a scalar shift amount 149 for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL}) { 150 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) 151 setOperationAction(Op, T, Custom); 152 if (Subtarget->hasUnimplementedSIMD128()) 153 setOperationAction(Op, MVT::v2i64, Custom); 154 } 155 156 // Custom lower lane accesses to expand out variable indices 157 for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT}) { 158 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) 159 setOperationAction(Op, T, Custom); 160 if (Subtarget->hasUnimplementedSIMD128()) 161 for (auto T : {MVT::v2i64, MVT::v2f64}) 162 setOperationAction(Op, T, Custom); 163 } 164 165 // There is no i64x2.mul instruction 166 setOperationAction(ISD::MUL, MVT::v2i64, Expand); 167 168 // There are no vector select instructions 169 for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT}) { 170 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) 171 setOperationAction(Op, T, Expand); 172 if (Subtarget->hasUnimplementedSIMD128()) 173 for (auto T : {MVT::v2i64, MVT::v2f64}) 174 setOperationAction(Op, T, Expand); 175 } 176 177 // Expand integer operations supported for scalars but not SIMD 178 for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV, 179 ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR}) { 180 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) 181 setOperationAction(Op, T, Expand); 182 if (Subtarget->hasUnimplementedSIMD128()) 183 setOperationAction(Op, MVT::v2i64, Expand); 184 } 185 186 // Expand float operations supported for scalars but not SIMD 187 for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, 188 ISD::FCOPYSIGN}) { 189 setOperationAction(Op, MVT::v4f32, Expand); 190 if (Subtarget->hasUnimplementedSIMD128()) 191 setOperationAction(Op, MVT::v2f64, Expand); 192 } 193 194 // Expand additional SIMD ops that V8 hasn't implemented yet 195 if (!Subtarget->hasUnimplementedSIMD128()) { 196 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); 197 setOperationAction(ISD::FDIV, MVT::v4f32, Expand); 198 } 199 } 200 201 // As a special case, these operators use the type to mean the type to 202 // sign-extend from. 203 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 204 if (!Subtarget->hasSignExt()) { 205 // Sign extends are legal only when extending a vector extract 206 auto Action = Subtarget->hasSIMD128() ? Custom : Expand; 207 for (auto T : {MVT::i8, MVT::i16, MVT::i32}) 208 setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action); 209 } 210 for (auto T : MVT::integer_vector_valuetypes()) 211 setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand); 212 213 // Dynamic stack allocation: use the default expansion. 214 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 215 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 216 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand); 217 218 setOperationAction(ISD::FrameIndex, MVT::i32, Custom); 219 setOperationAction(ISD::CopyToReg, MVT::Other, Custom); 220 221 // Expand these forms; we pattern-match the forms that we can handle in isel. 222 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) 223 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC}) 224 setOperationAction(Op, T, Expand); 225 226 // We have custom switch handling. 227 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 228 229 // WebAssembly doesn't have: 230 // - Floating-point extending loads. 231 // - Floating-point truncating stores. 232 // - i1 extending loads. 233 // - extending/truncating SIMD loads/stores 234 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 235 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 236 for (auto T : MVT::integer_valuetypes()) 237 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}) 238 setLoadExtAction(Ext, T, MVT::i1, Promote); 239 if (Subtarget->hasSIMD128()) { 240 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, 241 MVT::v2f64}) { 242 for (auto MemT : MVT::vector_valuetypes()) { 243 if (MVT(T) != MemT) { 244 setTruncStoreAction(T, MemT, Expand); 245 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}) 246 setLoadExtAction(Ext, T, MemT, Expand); 247 } 248 } 249 } 250 } 251 252 // Don't do anything clever with build_pairs 253 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 254 255 // Trap lowers to wasm unreachable 256 setOperationAction(ISD::TRAP, MVT::Other, Legal); 257 258 // Exception handling intrinsics 259 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 260 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 261 262 setMaxAtomicSizeInBitsSupported(64); 263 264 if (Subtarget->hasBulkMemory()) { 265 // Use memory.copy and friends over multiple loads and stores 266 MaxStoresPerMemcpy = 1; 267 MaxStoresPerMemcpyOptSize = 1; 268 MaxStoresPerMemmove = 1; 269 MaxStoresPerMemmoveOptSize = 1; 270 MaxStoresPerMemset = 1; 271 MaxStoresPerMemsetOptSize = 1; 272 } 273 274 // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is 275 // consistent with the f64 and f128 names. 276 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); 277 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); 278 279 // Always convert switches to br_tables unless there is only one case, which 280 // is equivalent to a simple branch. This reduces code size for wasm, and we 281 // defer possible jump table optimizations to the VM. 282 setMinimumJumpTableEntries(2); 283 } 284 285 TargetLowering::AtomicExpansionKind 286 WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 287 // We have wasm instructions for these 288 switch (AI->getOperation()) { 289 case AtomicRMWInst::Add: 290 case AtomicRMWInst::Sub: 291 case AtomicRMWInst::And: 292 case AtomicRMWInst::Or: 293 case AtomicRMWInst::Xor: 294 case AtomicRMWInst::Xchg: 295 return AtomicExpansionKind::None; 296 default: 297 break; 298 } 299 return AtomicExpansionKind::CmpXChg; 300 } 301 302 FastISel *WebAssemblyTargetLowering::createFastISel( 303 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const { 304 return WebAssembly::createFastISel(FuncInfo, LibInfo); 305 } 306 307 MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/, 308 EVT VT) const { 309 unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1); 310 if (BitWidth > 1 && BitWidth < 8) 311 BitWidth = 8; 312 313 if (BitWidth > 64) { 314 // The shift will be lowered to a libcall, and compiler-rt libcalls expect 315 // the count to be an i32. 316 BitWidth = 32; 317 assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) && 318 "32-bit shift counts ought to be enough for anyone"); 319 } 320 321 MVT Result = MVT::getIntegerVT(BitWidth); 322 assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE && 323 "Unable to represent scalar shift amount type"); 324 return Result; 325 } 326 327 // Lower an fp-to-int conversion operator from the LLVM opcode, which has an 328 // undefined result on invalid/overflow, to the WebAssembly opcode, which 329 // traps on invalid/overflow. 330 static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL, 331 MachineBasicBlock *BB, 332 const TargetInstrInfo &TII, 333 bool IsUnsigned, bool Int64, 334 bool Float64, unsigned LoweredOpcode) { 335 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 336 337 unsigned OutReg = MI.getOperand(0).getReg(); 338 unsigned InReg = MI.getOperand(1).getReg(); 339 340 unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32; 341 unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32; 342 unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32; 343 unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32; 344 unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32; 345 unsigned Eqz = WebAssembly::EQZ_I32; 346 unsigned And = WebAssembly::AND_I32; 347 int64_t Limit = Int64 ? INT64_MIN : INT32_MIN; 348 int64_t Substitute = IsUnsigned ? 0 : Limit; 349 double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit; 350 auto &Context = BB->getParent()->getFunction().getContext(); 351 Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context); 352 353 const BasicBlock *LLVMBB = BB->getBasicBlock(); 354 MachineFunction *F = BB->getParent(); 355 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB); 356 MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB); 357 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB); 358 359 MachineFunction::iterator It = ++BB->getIterator(); 360 F->insert(It, FalseMBB); 361 F->insert(It, TrueMBB); 362 F->insert(It, DoneMBB); 363 364 // Transfer the remainder of BB and its successor edges to DoneMBB. 365 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end()); 366 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 367 368 BB->addSuccessor(TrueMBB); 369 BB->addSuccessor(FalseMBB); 370 TrueMBB->addSuccessor(DoneMBB); 371 FalseMBB->addSuccessor(DoneMBB); 372 373 unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg; 374 Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); 375 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); 376 CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 377 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 378 FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg)); 379 TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg)); 380 381 MI.eraseFromParent(); 382 // For signed numbers, we can do a single comparison to determine whether 383 // fabs(x) is within range. 384 if (IsUnsigned) { 385 Tmp0 = InReg; 386 } else { 387 BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg); 388 } 389 BuildMI(BB, DL, TII.get(FConst), Tmp1) 390 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal))); 391 BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1); 392 393 // For unsigned numbers, we have to do a separate comparison with zero. 394 if (IsUnsigned) { 395 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); 396 unsigned SecondCmpReg = 397 MRI.createVirtualRegister(&WebAssembly::I32RegClass); 398 unsigned AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 399 BuildMI(BB, DL, TII.get(FConst), Tmp1) 400 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0))); 401 BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1); 402 BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg); 403 CmpReg = AndReg; 404 } 405 406 BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg); 407 408 // Create the CFG diamond to select between doing the conversion or using 409 // the substitute value. 410 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg); 411 BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg); 412 BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB); 413 BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute); 414 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg) 415 .addReg(FalseReg) 416 .addMBB(FalseMBB) 417 .addReg(TrueReg) 418 .addMBB(TrueMBB); 419 420 return DoneMBB; 421 } 422 423 MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter( 424 MachineInstr &MI, MachineBasicBlock *BB) const { 425 const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); 426 DebugLoc DL = MI.getDebugLoc(); 427 428 switch (MI.getOpcode()) { 429 default: 430 llvm_unreachable("Unexpected instr type to insert"); 431 case WebAssembly::FP_TO_SINT_I32_F32: 432 return LowerFPToInt(MI, DL, BB, TII, false, false, false, 433 WebAssembly::I32_TRUNC_S_F32); 434 case WebAssembly::FP_TO_UINT_I32_F32: 435 return LowerFPToInt(MI, DL, BB, TII, true, false, false, 436 WebAssembly::I32_TRUNC_U_F32); 437 case WebAssembly::FP_TO_SINT_I64_F32: 438 return LowerFPToInt(MI, DL, BB, TII, false, true, false, 439 WebAssembly::I64_TRUNC_S_F32); 440 case WebAssembly::FP_TO_UINT_I64_F32: 441 return LowerFPToInt(MI, DL, BB, TII, true, true, false, 442 WebAssembly::I64_TRUNC_U_F32); 443 case WebAssembly::FP_TO_SINT_I32_F64: 444 return LowerFPToInt(MI, DL, BB, TII, false, false, true, 445 WebAssembly::I32_TRUNC_S_F64); 446 case WebAssembly::FP_TO_UINT_I32_F64: 447 return LowerFPToInt(MI, DL, BB, TII, true, false, true, 448 WebAssembly::I32_TRUNC_U_F64); 449 case WebAssembly::FP_TO_SINT_I64_F64: 450 return LowerFPToInt(MI, DL, BB, TII, false, true, true, 451 WebAssembly::I64_TRUNC_S_F64); 452 case WebAssembly::FP_TO_UINT_I64_F64: 453 return LowerFPToInt(MI, DL, BB, TII, true, true, true, 454 WebAssembly::I64_TRUNC_U_F64); 455 llvm_unreachable("Unexpected instruction to emit with custom inserter"); 456 } 457 } 458 459 const char * 460 WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const { 461 switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) { 462 case WebAssemblyISD::FIRST_NUMBER: 463 break; 464 #define HANDLE_NODETYPE(NODE) \ 465 case WebAssemblyISD::NODE: \ 466 return "WebAssemblyISD::" #NODE; 467 #include "WebAssemblyISD.def" 468 #undef HANDLE_NODETYPE 469 } 470 return nullptr; 471 } 472 473 std::pair<unsigned, const TargetRegisterClass *> 474 WebAssemblyTargetLowering::getRegForInlineAsmConstraint( 475 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { 476 // First, see if this is a constraint that directly corresponds to a 477 // WebAssembly register class. 478 if (Constraint.size() == 1) { 479 switch (Constraint[0]) { 480 case 'r': 481 assert(VT != MVT::iPTR && "Pointer MVT not expected here"); 482 if (Subtarget->hasSIMD128() && VT.isVector()) { 483 if (VT.getSizeInBits() == 128) 484 return std::make_pair(0U, &WebAssembly::V128RegClass); 485 } 486 if (VT.isInteger() && !VT.isVector()) { 487 if (VT.getSizeInBits() <= 32) 488 return std::make_pair(0U, &WebAssembly::I32RegClass); 489 if (VT.getSizeInBits() <= 64) 490 return std::make_pair(0U, &WebAssembly::I64RegClass); 491 } 492 break; 493 default: 494 break; 495 } 496 } 497 498 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 499 } 500 501 bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const { 502 // Assume ctz is a relatively cheap operation. 503 return true; 504 } 505 506 bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const { 507 // Assume clz is a relatively cheap operation. 508 return true; 509 } 510 511 bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL, 512 const AddrMode &AM, 513 Type *Ty, unsigned AS, 514 Instruction *I) const { 515 // WebAssembly offsets are added as unsigned without wrapping. The 516 // isLegalAddressingMode gives us no way to determine if wrapping could be 517 // happening, so we approximate this by accepting only non-negative offsets. 518 if (AM.BaseOffs < 0) 519 return false; 520 521 // WebAssembly has no scale register operands. 522 if (AM.Scale != 0) 523 return false; 524 525 // Everything else is legal. 526 return true; 527 } 528 529 bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses( 530 EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/, bool *Fast) const { 531 // WebAssembly supports unaligned accesses, though it should be declared 532 // with the p2align attribute on loads and stores which do so, and there 533 // may be a performance impact. We tell LLVM they're "fast" because 534 // for the kinds of things that LLVM uses this for (merging adjacent stores 535 // of constants, etc.), WebAssembly implementations will either want the 536 // unaligned access or they'll split anyway. 537 if (Fast) 538 *Fast = true; 539 return true; 540 } 541 542 bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT, 543 AttributeList Attr) const { 544 // The current thinking is that wasm engines will perform this optimization, 545 // so we can save on code size. 546 return true; 547 } 548 549 EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL, 550 LLVMContext &C, 551 EVT VT) const { 552 if (VT.isVector()) 553 return VT.changeVectorElementTypeToInteger(); 554 555 return TargetLowering::getSetCCResultType(DL, C, VT); 556 } 557 558 bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 559 const CallInst &I, 560 MachineFunction &MF, 561 unsigned Intrinsic) const { 562 switch (Intrinsic) { 563 case Intrinsic::wasm_atomic_notify: 564 Info.opc = ISD::INTRINSIC_W_CHAIN; 565 Info.memVT = MVT::i32; 566 Info.ptrVal = I.getArgOperand(0); 567 Info.offset = 0; 568 Info.align = 4; 569 // atomic.notify instruction does not really load the memory specified with 570 // this argument, but MachineMemOperand should either be load or store, so 571 // we set this to a load. 572 // FIXME Volatile isn't really correct, but currently all LLVM atomic 573 // instructions are treated as volatiles in the backend, so we should be 574 // consistent. The same applies for wasm_atomic_wait intrinsics too. 575 Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; 576 return true; 577 case Intrinsic::wasm_atomic_wait_i32: 578 Info.opc = ISD::INTRINSIC_W_CHAIN; 579 Info.memVT = MVT::i32; 580 Info.ptrVal = I.getArgOperand(0); 581 Info.offset = 0; 582 Info.align = 4; 583 Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; 584 return true; 585 case Intrinsic::wasm_atomic_wait_i64: 586 Info.opc = ISD::INTRINSIC_W_CHAIN; 587 Info.memVT = MVT::i64; 588 Info.ptrVal = I.getArgOperand(0); 589 Info.offset = 0; 590 Info.align = 8; 591 Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; 592 return true; 593 default: 594 return false; 595 } 596 } 597 598 //===----------------------------------------------------------------------===// 599 // WebAssembly Lowering private implementation. 600 //===----------------------------------------------------------------------===// 601 602 //===----------------------------------------------------------------------===// 603 // Lowering Code 604 //===----------------------------------------------------------------------===// 605 606 static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) { 607 MachineFunction &MF = DAG.getMachineFunction(); 608 DAG.getContext()->diagnose( 609 DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc())); 610 } 611 612 // Test whether the given calling convention is supported. 613 static bool callingConvSupported(CallingConv::ID CallConv) { 614 // We currently support the language-independent target-independent 615 // conventions. We don't yet have a way to annotate calls with properties like 616 // "cold", and we don't have any call-clobbered registers, so these are mostly 617 // all handled the same. 618 return CallConv == CallingConv::C || CallConv == CallingConv::Fast || 619 CallConv == CallingConv::Cold || 620 CallConv == CallingConv::PreserveMost || 621 CallConv == CallingConv::PreserveAll || 622 CallConv == CallingConv::CXX_FAST_TLS; 623 } 624 625 SDValue 626 WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, 627 SmallVectorImpl<SDValue> &InVals) const { 628 SelectionDAG &DAG = CLI.DAG; 629 SDLoc DL = CLI.DL; 630 SDValue Chain = CLI.Chain; 631 SDValue Callee = CLI.Callee; 632 MachineFunction &MF = DAG.getMachineFunction(); 633 auto Layout = MF.getDataLayout(); 634 635 CallingConv::ID CallConv = CLI.CallConv; 636 if (!callingConvSupported(CallConv)) 637 fail(DL, DAG, 638 "WebAssembly doesn't support language-specific or target-specific " 639 "calling conventions yet"); 640 if (CLI.IsPatchPoint) 641 fail(DL, DAG, "WebAssembly doesn't support patch point yet"); 642 643 // WebAssembly doesn't currently support explicit tail calls. If they are 644 // required, fail. Otherwise, just disable them. 645 if ((CallConv == CallingConv::Fast && CLI.IsTailCall && 646 MF.getTarget().Options.GuaranteedTailCallOpt) || 647 (CLI.CS && CLI.CS.isMustTailCall())) 648 fail(DL, DAG, "WebAssembly doesn't support tail call yet"); 649 CLI.IsTailCall = false; 650 651 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 652 if (Ins.size() > 1) 653 fail(DL, DAG, "WebAssembly doesn't support more than 1 returned value yet"); 654 655 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 656 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 657 unsigned NumFixedArgs = 0; 658 for (unsigned I = 0; I < Outs.size(); ++I) { 659 const ISD::OutputArg &Out = Outs[I]; 660 SDValue &OutVal = OutVals[I]; 661 if (Out.Flags.isNest()) 662 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); 663 if (Out.Flags.isInAlloca()) 664 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); 665 if (Out.Flags.isInConsecutiveRegs()) 666 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); 667 if (Out.Flags.isInConsecutiveRegsLast()) 668 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); 669 if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) { 670 auto &MFI = MF.getFrameInfo(); 671 int FI = MFI.CreateStackObject(Out.Flags.getByValSize(), 672 Out.Flags.getByValAlign(), 673 /*isSS=*/false); 674 SDValue SizeNode = 675 DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32); 676 SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout)); 677 Chain = DAG.getMemcpy( 678 Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getByValAlign(), 679 /*isVolatile*/ false, /*AlwaysInline=*/false, 680 /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo()); 681 OutVal = FINode; 682 } 683 // Count the number of fixed args *after* legalization. 684 NumFixedArgs += Out.IsFixed; 685 } 686 687 bool IsVarArg = CLI.IsVarArg; 688 auto PtrVT = getPointerTy(Layout); 689 690 // Analyze operands of the call, assigning locations to each operand. 691 SmallVector<CCValAssign, 16> ArgLocs; 692 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 693 694 if (IsVarArg) { 695 // Outgoing non-fixed arguments are placed in a buffer. First 696 // compute their offsets and the total amount of buffer space needed. 697 for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) { 698 const ISD::OutputArg &Out = Outs[I]; 699 SDValue &Arg = OutVals[I]; 700 EVT VT = Arg.getValueType(); 701 assert(VT != MVT::iPTR && "Legalized args should be concrete"); 702 Type *Ty = VT.getTypeForEVT(*DAG.getContext()); 703 unsigned Align = std::max(Out.Flags.getOrigAlign(), 704 Layout.getABITypeAlignment(Ty)); 705 unsigned Offset = CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), 706 Align); 707 CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(), 708 Offset, VT.getSimpleVT(), 709 CCValAssign::Full)); 710 } 711 } 712 713 unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); 714 715 SDValue FINode; 716 if (IsVarArg && NumBytes) { 717 // For non-fixed arguments, next emit stores to store the argument values 718 // to the stack buffer at the offsets computed above. 719 int FI = MF.getFrameInfo().CreateStackObject(NumBytes, 720 Layout.getStackAlignment(), 721 /*isSS=*/false); 722 unsigned ValNo = 0; 723 SmallVector<SDValue, 8> Chains; 724 for (SDValue Arg : 725 make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) { 726 assert(ArgLocs[ValNo].getValNo() == ValNo && 727 "ArgLocs should remain in order and only hold varargs args"); 728 unsigned Offset = ArgLocs[ValNo++].getLocMemOffset(); 729 FINode = DAG.getFrameIndex(FI, getPointerTy(Layout)); 730 SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode, 731 DAG.getConstant(Offset, DL, PtrVT)); 732 Chains.push_back( 733 DAG.getStore(Chain, DL, Arg, Add, 734 MachinePointerInfo::getFixedStack(MF, FI, Offset), 0)); 735 } 736 if (!Chains.empty()) 737 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); 738 } else if (IsVarArg) { 739 FINode = DAG.getIntPtrConstant(0, DL); 740 } 741 742 if (Callee->getOpcode() == ISD::GlobalAddress) { 743 // If the callee is a GlobalAddress node (quite common, every direct call 744 // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress 745 // doesn't at MO_GOT which is not needed for direct calls. 746 GlobalAddressSDNode* GA = cast<GlobalAddressSDNode>(Callee); 747 Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), DL, 748 getPointerTy(DAG.getDataLayout()), 749 GA->getOffset()); 750 Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL, 751 getPointerTy(DAG.getDataLayout()), Callee); 752 } 753 754 // Compute the operands for the CALLn node. 755 SmallVector<SDValue, 16> Ops; 756 Ops.push_back(Chain); 757 Ops.push_back(Callee); 758 759 // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs 760 // isn't reliable. 761 Ops.append(OutVals.begin(), 762 IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end()); 763 // Add a pointer to the vararg buffer. 764 if (IsVarArg) 765 Ops.push_back(FINode); 766 767 SmallVector<EVT, 8> InTys; 768 for (const auto &In : Ins) { 769 assert(!In.Flags.isByVal() && "byval is not valid for return values"); 770 assert(!In.Flags.isNest() && "nest is not valid for return values"); 771 if (In.Flags.isInAlloca()) 772 fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values"); 773 if (In.Flags.isInConsecutiveRegs()) 774 fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values"); 775 if (In.Flags.isInConsecutiveRegsLast()) 776 fail(DL, DAG, 777 "WebAssembly hasn't implemented cons regs last return values"); 778 // Ignore In.getOrigAlign() because all our arguments are passed in 779 // registers. 780 InTys.push_back(In.VT); 781 } 782 InTys.push_back(MVT::Other); 783 SDVTList InTyList = DAG.getVTList(InTys); 784 SDValue Res = 785 DAG.getNode(Ins.empty() ? WebAssemblyISD::CALL0 : WebAssemblyISD::CALL1, 786 DL, InTyList, Ops); 787 if (Ins.empty()) { 788 Chain = Res; 789 } else { 790 InVals.push_back(Res); 791 Chain = Res.getValue(1); 792 } 793 794 return Chain; 795 } 796 797 bool WebAssemblyTargetLowering::CanLowerReturn( 798 CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/, 799 const SmallVectorImpl<ISD::OutputArg> &Outs, 800 LLVMContext & /*Context*/) const { 801 // WebAssembly can't currently handle returning tuples. 802 return Outs.size() <= 1; 803 } 804 805 SDValue WebAssemblyTargetLowering::LowerReturn( 806 SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/, 807 const SmallVectorImpl<ISD::OutputArg> &Outs, 808 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 809 SelectionDAG &DAG) const { 810 assert(Outs.size() <= 1 && "WebAssembly can only return up to one value"); 811 if (!callingConvSupported(CallConv)) 812 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); 813 814 SmallVector<SDValue, 4> RetOps(1, Chain); 815 RetOps.append(OutVals.begin(), OutVals.end()); 816 Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps); 817 818 // Record the number and types of the return values. 819 for (const ISD::OutputArg &Out : Outs) { 820 assert(!Out.Flags.isByVal() && "byval is not valid for return values"); 821 assert(!Out.Flags.isNest() && "nest is not valid for return values"); 822 assert(Out.IsFixed && "non-fixed return value is not valid"); 823 if (Out.Flags.isInAlloca()) 824 fail(DL, DAG, "WebAssembly hasn't implemented inalloca results"); 825 if (Out.Flags.isInConsecutiveRegs()) 826 fail(DL, DAG, "WebAssembly hasn't implemented cons regs results"); 827 if (Out.Flags.isInConsecutiveRegsLast()) 828 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results"); 829 } 830 831 return Chain; 832 } 833 834 SDValue WebAssemblyTargetLowering::LowerFormalArguments( 835 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 836 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 837 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 838 if (!callingConvSupported(CallConv)) 839 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); 840 841 MachineFunction &MF = DAG.getMachineFunction(); 842 auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>(); 843 844 // Set up the incoming ARGUMENTS value, which serves to represent the liveness 845 // of the incoming values before they're represented by virtual registers. 846 MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS); 847 848 for (const ISD::InputArg &In : Ins) { 849 if (In.Flags.isInAlloca()) 850 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); 851 if (In.Flags.isNest()) 852 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); 853 if (In.Flags.isInConsecutiveRegs()) 854 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); 855 if (In.Flags.isInConsecutiveRegsLast()) 856 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); 857 // Ignore In.getOrigAlign() because all our arguments are passed in 858 // registers. 859 InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT, 860 DAG.getTargetConstant(InVals.size(), 861 DL, MVT::i32)) 862 : DAG.getUNDEF(In.VT)); 863 864 // Record the number and types of arguments. 865 MFI->addParam(In.VT); 866 } 867 868 // Varargs are copied into a buffer allocated by the caller, and a pointer to 869 // the buffer is passed as an argument. 870 if (IsVarArg) { 871 MVT PtrVT = getPointerTy(MF.getDataLayout()); 872 unsigned VarargVreg = 873 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT)); 874 MFI->setVarargBufferVreg(VarargVreg); 875 Chain = DAG.getCopyToReg( 876 Chain, DL, VarargVreg, 877 DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT, 878 DAG.getTargetConstant(Ins.size(), DL, MVT::i32))); 879 MFI->addParam(PtrVT); 880 } 881 882 // Record the number and types of arguments and results. 883 SmallVector<MVT, 4> Params; 884 SmallVector<MVT, 4> Results; 885 computeSignatureVTs(MF.getFunction().getFunctionType(), MF.getFunction(), 886 DAG.getTarget(), Params, Results); 887 for (MVT VT : Results) 888 MFI->addResult(VT); 889 // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify 890 // the param logic here with ComputeSignatureVTs 891 assert(MFI->getParams().size() == Params.size() && 892 std::equal(MFI->getParams().begin(), MFI->getParams().end(), 893 Params.begin())); 894 895 return Chain; 896 } 897 898 //===----------------------------------------------------------------------===// 899 // Custom lowering hooks. 900 //===----------------------------------------------------------------------===// 901 902 SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op, 903 SelectionDAG &DAG) const { 904 SDLoc DL(Op); 905 switch (Op.getOpcode()) { 906 default: 907 llvm_unreachable("unimplemented operation lowering"); 908 return SDValue(); 909 case ISD::FrameIndex: 910 return LowerFrameIndex(Op, DAG); 911 case ISD::GlobalAddress: 912 return LowerGlobalAddress(Op, DAG); 913 case ISD::ExternalSymbol: 914 return LowerExternalSymbol(Op, DAG); 915 case ISD::JumpTable: 916 return LowerJumpTable(Op, DAG); 917 case ISD::BR_JT: 918 return LowerBR_JT(Op, DAG); 919 case ISD::VASTART: 920 return LowerVASTART(Op, DAG); 921 case ISD::BlockAddress: 922 case ISD::BRIND: 923 fail(DL, DAG, "WebAssembly hasn't implemented computed gotos"); 924 return SDValue(); 925 case ISD::RETURNADDR: // Probably nothing meaningful can be returned here. 926 fail(DL, DAG, "WebAssembly hasn't implemented __builtin_return_address"); 927 return SDValue(); 928 case ISD::FRAMEADDR: 929 return LowerFRAMEADDR(Op, DAG); 930 case ISD::CopyToReg: 931 return LowerCopyToReg(Op, DAG); 932 case ISD::EXTRACT_VECTOR_ELT: 933 case ISD::INSERT_VECTOR_ELT: 934 return LowerAccessVectorElement(Op, DAG); 935 case ISD::INTRINSIC_VOID: 936 case ISD::INTRINSIC_WO_CHAIN: 937 case ISD::INTRINSIC_W_CHAIN: 938 return LowerIntrinsic(Op, DAG); 939 case ISD::SIGN_EXTEND_INREG: 940 return LowerSIGN_EXTEND_INREG(Op, DAG); 941 case ISD::BUILD_VECTOR: 942 return LowerBUILD_VECTOR(Op, DAG); 943 case ISD::VECTOR_SHUFFLE: 944 return LowerVECTOR_SHUFFLE(Op, DAG); 945 case ISD::SHL: 946 case ISD::SRA: 947 case ISD::SRL: 948 return LowerShift(Op, DAG); 949 } 950 } 951 952 SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op, 953 SelectionDAG &DAG) const { 954 SDValue Src = Op.getOperand(2); 955 if (isa<FrameIndexSDNode>(Src.getNode())) { 956 // CopyToReg nodes don't support FrameIndex operands. Other targets select 957 // the FI to some LEA-like instruction, but since we don't have that, we 958 // need to insert some kind of instruction that can take an FI operand and 959 // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy 960 // local.copy between Op and its FI operand. 961 SDValue Chain = Op.getOperand(0); 962 SDLoc DL(Op); 963 unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg(); 964 EVT VT = Src.getValueType(); 965 SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32 966 : WebAssembly::COPY_I64, 967 DL, VT, Src), 968 0); 969 return Op.getNode()->getNumValues() == 1 970 ? DAG.getCopyToReg(Chain, DL, Reg, Copy) 971 : DAG.getCopyToReg(Chain, DL, Reg, Copy, 972 Op.getNumOperands() == 4 ? Op.getOperand(3) 973 : SDValue()); 974 } 975 return SDValue(); 976 } 977 978 SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op, 979 SelectionDAG &DAG) const { 980 int FI = cast<FrameIndexSDNode>(Op)->getIndex(); 981 return DAG.getTargetFrameIndex(FI, Op.getValueType()); 982 } 983 984 SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op, 985 SelectionDAG &DAG) const { 986 // Non-zero depths are not supported by WebAssembly currently. Use the 987 // legalizer's default expansion, which is to return 0 (what this function is 988 // documented to do). 989 if (Op.getConstantOperandVal(0) > 0) 990 return SDValue(); 991 992 DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true); 993 EVT VT = Op.getValueType(); 994 unsigned FP = 995 Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction()); 996 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT); 997 } 998 999 SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op, 1000 SelectionDAG &DAG) const { 1001 SDLoc DL(Op); 1002 const auto *GA = cast<GlobalAddressSDNode>(Op); 1003 EVT VT = Op.getValueType(); 1004 assert(GA->getTargetFlags() == 0 && 1005 "Unexpected target flags on generic GlobalAddressSDNode"); 1006 if (GA->getAddressSpace() != 0) 1007 fail(DL, DAG, "WebAssembly only expects the 0 address space"); 1008 1009 unsigned OperandFlags = 0; 1010 if (isPositionIndependent()) { 1011 const GlobalValue *GV = GA->getGlobal(); 1012 if (getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) { 1013 MachineFunction &MF = DAG.getMachineFunction(); 1014 MVT PtrVT = getPointerTy(MF.getDataLayout()); 1015 const char *BaseName; 1016 if (GV->getValueType()->isFunctionTy()) { 1017 BaseName = MF.createExternalSymbolName("__table_base"); 1018 OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL; 1019 } 1020 else { 1021 BaseName = MF.createExternalSymbolName("__memory_base"); 1022 OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL; 1023 } 1024 SDValue BaseAddr = 1025 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, 1026 DAG.getTargetExternalSymbol(BaseName, PtrVT)); 1027 1028 SDValue SymAddr = DAG.getNode( 1029 WebAssemblyISD::WrapperPIC, DL, VT, 1030 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(), 1031 OperandFlags)); 1032 1033 return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr); 1034 } else { 1035 OperandFlags = WebAssemblyII::MO_GOT; 1036 } 1037 } 1038 1039 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, 1040 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, 1041 GA->getOffset(), OperandFlags)); 1042 } 1043 1044 SDValue 1045 WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op, 1046 SelectionDAG &DAG) const { 1047 SDLoc DL(Op); 1048 const auto *ES = cast<ExternalSymbolSDNode>(Op); 1049 EVT VT = Op.getValueType(); 1050 assert(ES->getTargetFlags() == 0 && 1051 "Unexpected target flags on generic ExternalSymbolSDNode"); 1052 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, 1053 DAG.getTargetExternalSymbol(ES->getSymbol(), VT)); 1054 } 1055 1056 SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op, 1057 SelectionDAG &DAG) const { 1058 // There's no need for a Wrapper node because we always incorporate a jump 1059 // table operand into a BR_TABLE instruction, rather than ever 1060 // materializing it in a register. 1061 const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 1062 return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(), 1063 JT->getTargetFlags()); 1064 } 1065 1066 SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op, 1067 SelectionDAG &DAG) const { 1068 SDLoc DL(Op); 1069 SDValue Chain = Op.getOperand(0); 1070 const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1)); 1071 SDValue Index = Op.getOperand(2); 1072 assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags"); 1073 1074 SmallVector<SDValue, 8> Ops; 1075 Ops.push_back(Chain); 1076 Ops.push_back(Index); 1077 1078 MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo(); 1079 const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs; 1080 1081 // Add an operand for each case. 1082 for (auto MBB : MBBs) 1083 Ops.push_back(DAG.getBasicBlock(MBB)); 1084 1085 // TODO: For now, we just pick something arbitrary for a default case for now. 1086 // We really want to sniff out the guard and put in the real default case (and 1087 // delete the guard). 1088 Ops.push_back(DAG.getBasicBlock(MBBs[0])); 1089 1090 return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops); 1091 } 1092 1093 SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op, 1094 SelectionDAG &DAG) const { 1095 SDLoc DL(Op); 1096 EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout()); 1097 1098 auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>(); 1099 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1100 1101 SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL, 1102 MFI->getVarargBufferVreg(), PtrVT); 1103 return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1), 1104 MachinePointerInfo(SV), 0); 1105 } 1106 1107 SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op, 1108 SelectionDAG &DAG) const { 1109 MachineFunction &MF = DAG.getMachineFunction(); 1110 unsigned IntNo; 1111 switch (Op.getOpcode()) { 1112 case ISD::INTRINSIC_VOID: 1113 case ISD::INTRINSIC_W_CHAIN: 1114 IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 1115 break; 1116 case ISD::INTRINSIC_WO_CHAIN: 1117 IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1118 break; 1119 default: 1120 llvm_unreachable("Invalid intrinsic"); 1121 } 1122 SDLoc DL(Op); 1123 1124 switch (IntNo) { 1125 default: 1126 return SDValue(); // Don't custom lower most intrinsics. 1127 1128 case Intrinsic::wasm_lsda: { 1129 EVT VT = Op.getValueType(); 1130 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 1131 MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); 1132 auto &Context = MF.getMMI().getContext(); 1133 MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") + 1134 Twine(MF.getFunctionNumber())); 1135 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, 1136 DAG.getMCSymbol(S, PtrVT)); 1137 } 1138 1139 case Intrinsic::wasm_throw: { 1140 // We only support C++ exceptions for now 1141 int Tag = cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 1142 if (Tag != CPP_EXCEPTION) 1143 llvm_unreachable("Invalid tag!"); 1144 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 1145 MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); 1146 const char *SymName = MF.createExternalSymbolName("__cpp_exception"); 1147 SDValue SymNode = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, 1148 DAG.getTargetExternalSymbol(SymName, PtrVT)); 1149 return DAG.getNode(WebAssemblyISD::THROW, DL, 1150 MVT::Other, // outchain type 1151 { 1152 Op.getOperand(0), // inchain 1153 SymNode, // exception symbol 1154 Op.getOperand(3) // thrown value 1155 }); 1156 } 1157 } 1158 } 1159 1160 SDValue 1161 WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, 1162 SelectionDAG &DAG) const { 1163 // If sign extension operations are disabled, allow sext_inreg only if operand 1164 // is a vector extract. SIMD does not depend on sign extension operations, but 1165 // allowing sext_inreg in this context lets us have simple patterns to select 1166 // extract_lane_s instructions. Expanding sext_inreg everywhere would be 1167 // simpler in this file, but would necessitate large and brittle patterns to 1168 // undo the expansion and select extract_lane_s instructions. 1169 assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128()); 1170 if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT) 1171 return Op; 1172 // Otherwise expand 1173 return SDValue(); 1174 } 1175 1176 SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, 1177 SelectionDAG &DAG) const { 1178 SDLoc DL(Op); 1179 const EVT VecT = Op.getValueType(); 1180 const EVT LaneT = Op.getOperand(0).getValueType(); 1181 const size_t Lanes = Op.getNumOperands(); 1182 auto IsConstant = [](const SDValue &V) { 1183 return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP; 1184 }; 1185 1186 // Find the most common operand, which is approximately the best to splat 1187 using Entry = std::pair<SDValue, size_t>; 1188 SmallVector<Entry, 16> ValueCounts; 1189 size_t NumConst = 0, NumDynamic = 0; 1190 for (const SDValue &Lane : Op->op_values()) { 1191 if (Lane.isUndef()) { 1192 continue; 1193 } else if (IsConstant(Lane)) { 1194 NumConst++; 1195 } else { 1196 NumDynamic++; 1197 } 1198 auto CountIt = std::find_if(ValueCounts.begin(), ValueCounts.end(), 1199 [&Lane](Entry A) { return A.first == Lane; }); 1200 if (CountIt == ValueCounts.end()) { 1201 ValueCounts.emplace_back(Lane, 1); 1202 } else { 1203 CountIt->second++; 1204 } 1205 } 1206 auto CommonIt = 1207 std::max_element(ValueCounts.begin(), ValueCounts.end(), 1208 [](Entry A, Entry B) { return A.second < B.second; }); 1209 assert(CommonIt != ValueCounts.end() && "Unexpected all-undef build_vector"); 1210 SDValue SplatValue = CommonIt->first; 1211 size_t NumCommon = CommonIt->second; 1212 1213 // If v128.const is available, consider using it instead of a splat 1214 if (Subtarget->hasUnimplementedSIMD128()) { 1215 // {i32,i64,f32,f64}.const opcode, and value 1216 const size_t ConstBytes = 1 + std::max(size_t(4), 16 / Lanes); 1217 // SIMD prefix and opcode 1218 const size_t SplatBytes = 2; 1219 const size_t SplatConstBytes = SplatBytes + ConstBytes; 1220 // SIMD prefix, opcode, and lane index 1221 const size_t ReplaceBytes = 3; 1222 const size_t ReplaceConstBytes = ReplaceBytes + ConstBytes; 1223 // SIMD prefix, v128.const opcode, and 128-bit value 1224 const size_t VecConstBytes = 18; 1225 // Initial v128.const and a replace_lane for each non-const operand 1226 const size_t ConstInitBytes = VecConstBytes + NumDynamic * ReplaceBytes; 1227 // Initial splat and all necessary replace_lanes 1228 const size_t SplatInitBytes = 1229 IsConstant(SplatValue) 1230 // Initial constant splat 1231 ? (SplatConstBytes + 1232 // Constant replace_lanes 1233 (NumConst - NumCommon) * ReplaceConstBytes + 1234 // Dynamic replace_lanes 1235 (NumDynamic * ReplaceBytes)) 1236 // Initial dynamic splat 1237 : (SplatBytes + 1238 // Constant replace_lanes 1239 (NumConst * ReplaceConstBytes) + 1240 // Dynamic replace_lanes 1241 (NumDynamic - NumCommon) * ReplaceBytes); 1242 if (ConstInitBytes < SplatInitBytes) { 1243 // Create build_vector that will lower to initial v128.const 1244 SmallVector<SDValue, 16> ConstLanes; 1245 for (const SDValue &Lane : Op->op_values()) { 1246 if (IsConstant(Lane)) { 1247 ConstLanes.push_back(Lane); 1248 } else if (LaneT.isFloatingPoint()) { 1249 ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT)); 1250 } else { 1251 ConstLanes.push_back(DAG.getConstant(0, DL, LaneT)); 1252 } 1253 } 1254 SDValue Result = DAG.getBuildVector(VecT, DL, ConstLanes); 1255 // Add replace_lane instructions for non-const lanes 1256 for (size_t I = 0; I < Lanes; ++I) { 1257 const SDValue &Lane = Op->getOperand(I); 1258 if (!Lane.isUndef() && !IsConstant(Lane)) 1259 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane, 1260 DAG.getConstant(I, DL, MVT::i32)); 1261 } 1262 return Result; 1263 } 1264 } 1265 // Use a splat for the initial vector 1266 SDValue Result = DAG.getSplatBuildVector(VecT, DL, SplatValue); 1267 // Add replace_lane instructions for other values 1268 for (size_t I = 0; I < Lanes; ++I) { 1269 const SDValue &Lane = Op->getOperand(I); 1270 if (Lane != SplatValue) 1271 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane, 1272 DAG.getConstant(I, DL, MVT::i32)); 1273 } 1274 return Result; 1275 } 1276 1277 SDValue 1278 WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, 1279 SelectionDAG &DAG) const { 1280 SDLoc DL(Op); 1281 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask(); 1282 MVT VecType = Op.getOperand(0).getSimpleValueType(); 1283 assert(VecType.is128BitVector() && "Unexpected shuffle vector type"); 1284 size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8; 1285 1286 // Space for two vector args and sixteen mask indices 1287 SDValue Ops[18]; 1288 size_t OpIdx = 0; 1289 Ops[OpIdx++] = Op.getOperand(0); 1290 Ops[OpIdx++] = Op.getOperand(1); 1291 1292 // Expand mask indices to byte indices and materialize them as operands 1293 for (int M : Mask) { 1294 for (size_t J = 0; J < LaneBytes; ++J) { 1295 // Lower undefs (represented by -1 in mask) to zero 1296 uint64_t ByteIndex = M == -1 ? 0 : (uint64_t)M * LaneBytes + J; 1297 Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32); 1298 } 1299 } 1300 1301 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops); 1302 } 1303 1304 SDValue 1305 WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op, 1306 SelectionDAG &DAG) const { 1307 // Allow constant lane indices, expand variable lane indices 1308 SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode(); 1309 if (isa<ConstantSDNode>(IdxNode) || IdxNode->isUndef()) 1310 return Op; 1311 else 1312 // Perform default expansion 1313 return SDValue(); 1314 } 1315 1316 static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) { 1317 EVT LaneT = Op.getSimpleValueType().getVectorElementType(); 1318 // 32-bit and 64-bit unrolled shifts will have proper semantics 1319 if (LaneT.bitsGE(MVT::i32)) 1320 return DAG.UnrollVectorOp(Op.getNode()); 1321 // Otherwise mask the shift value to get proper semantics from 32-bit shift 1322 SDLoc DL(Op); 1323 SDValue ShiftVal = Op.getOperand(1); 1324 uint64_t MaskVal = LaneT.getSizeInBits() - 1; 1325 SDValue MaskedShiftVal = DAG.getNode( 1326 ISD::AND, // mask opcode 1327 DL, ShiftVal.getValueType(), // masked value type 1328 ShiftVal, // original shift value operand 1329 DAG.getConstant(MaskVal, DL, ShiftVal.getValueType()) // mask operand 1330 ); 1331 1332 return DAG.UnrollVectorOp( 1333 DAG.getNode(Op.getOpcode(), // original shift opcode 1334 DL, Op.getValueType(), // original return type 1335 Op.getOperand(0), // original vector operand, 1336 MaskedShiftVal // new masked shift value operand 1337 ) 1338 .getNode()); 1339 } 1340 1341 SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op, 1342 SelectionDAG &DAG) const { 1343 SDLoc DL(Op); 1344 1345 // Only manually lower vector shifts 1346 assert(Op.getSimpleValueType().isVector()); 1347 1348 // Expand all vector shifts until V8 fixes its implementation 1349 // TODO: remove this once V8 is fixed 1350 if (!Subtarget->hasUnimplementedSIMD128()) 1351 return unrollVectorShift(Op, DAG); 1352 1353 // Unroll non-splat vector shifts 1354 BuildVectorSDNode *ShiftVec; 1355 SDValue SplatVal; 1356 if (!(ShiftVec = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) || 1357 !(SplatVal = ShiftVec->getSplatValue())) 1358 return unrollVectorShift(Op, DAG); 1359 1360 // All splats except i64x2 const splats are handled by patterns 1361 auto *SplatConst = dyn_cast<ConstantSDNode>(SplatVal); 1362 if (!SplatConst || Op.getSimpleValueType() != MVT::v2i64) 1363 return Op; 1364 1365 // i64x2 const splats are custom lowered to avoid unnecessary wraps 1366 unsigned Opcode; 1367 switch (Op.getOpcode()) { 1368 case ISD::SHL: 1369 Opcode = WebAssemblyISD::VEC_SHL; 1370 break; 1371 case ISD::SRA: 1372 Opcode = WebAssemblyISD::VEC_SHR_S; 1373 break; 1374 case ISD::SRL: 1375 Opcode = WebAssemblyISD::VEC_SHR_U; 1376 break; 1377 default: 1378 llvm_unreachable("unexpected opcode"); 1379 } 1380 APInt Shift = SplatConst->getAPIntValue().zextOrTrunc(32); 1381 return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), 1382 DAG.getConstant(Shift, DL, MVT::i32)); 1383 } 1384 1385 //===----------------------------------------------------------------------===// 1386 // WebAssembly Optimization Hooks 1387 //===----------------------------------------------------------------------===// 1388