1 //=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements the WebAssemblyTargetLowering class. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "WebAssemblyISelLowering.h" 15 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 16 #include "WebAssemblyMachineFunctionInfo.h" 17 #include "WebAssemblySubtarget.h" 18 #include "WebAssemblyTargetMachine.h" 19 #include "WebAssemblyUtilities.h" 20 #include "llvm/CodeGen/Analysis.h" 21 #include "llvm/CodeGen/CallingConvLower.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineJumpTableInfo.h" 24 #include "llvm/CodeGen/MachineModuleInfo.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/SelectionDAG.h" 27 #include "llvm/CodeGen/WasmEHFuncInfo.h" 28 #include "llvm/IR/DiagnosticInfo.h" 29 #include "llvm/IR/DiagnosticPrinter.h" 30 #include "llvm/IR/Function.h" 31 #include "llvm/IR/Intrinsics.h" 32 #include "llvm/IR/IntrinsicsWebAssembly.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/MathExtras.h" 36 #include "llvm/Support/raw_ostream.h" 37 #include "llvm/Target/TargetOptions.h" 38 using namespace llvm; 39 40 #define DEBUG_TYPE "wasm-lower" 41 42 WebAssemblyTargetLowering::WebAssemblyTargetLowering( 43 const TargetMachine &TM, const WebAssemblySubtarget &STI) 44 : TargetLowering(TM), Subtarget(&STI) { 45 auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32; 46 47 // Booleans always contain 0 or 1. 48 setBooleanContents(ZeroOrOneBooleanContent); 49 // Except in SIMD vectors 50 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 51 // We don't know the microarchitecture here, so just reduce register pressure. 52 setSchedulingPreference(Sched::RegPressure); 53 // Tell ISel that we have a stack pointer. 54 setStackPointerRegisterToSaveRestore( 55 Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32); 56 // Set up the register classes. 57 addRegisterClass(MVT::i32, &WebAssembly::I32RegClass); 58 addRegisterClass(MVT::i64, &WebAssembly::I64RegClass); 59 addRegisterClass(MVT::f32, &WebAssembly::F32RegClass); 60 addRegisterClass(MVT::f64, &WebAssembly::F64RegClass); 61 if (Subtarget->hasSIMD128()) { 62 addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass); 63 addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass); 64 addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass); 65 addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass); 66 addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass); 67 addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass); 68 } 69 // Compute derived properties from the register classes. 70 computeRegisterProperties(Subtarget->getRegisterInfo()); 71 72 setOperationAction(ISD::GlobalAddress, MVTPtr, Custom); 73 setOperationAction(ISD::GlobalTLSAddress, MVTPtr, Custom); 74 setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom); 75 setOperationAction(ISD::JumpTable, MVTPtr, Custom); 76 setOperationAction(ISD::BlockAddress, MVTPtr, Custom); 77 setOperationAction(ISD::BRIND, MVT::Other, Custom); 78 79 // Take the default expansion for va_arg, va_copy, and va_end. There is no 80 // default action for va_start, so we do that custom. 81 setOperationAction(ISD::VASTART, MVT::Other, Custom); 82 setOperationAction(ISD::VAARG, MVT::Other, Expand); 83 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 84 setOperationAction(ISD::VAEND, MVT::Other, Expand); 85 86 for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) { 87 // Don't expand the floating-point types to constant pools. 88 setOperationAction(ISD::ConstantFP, T, Legal); 89 // Expand floating-point comparisons. 90 for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE, 91 ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE}) 92 setCondCodeAction(CC, T, Expand); 93 // Expand floating-point library function operators. 94 for (auto Op : 95 {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA}) 96 setOperationAction(Op, T, Expand); 97 // Note supported floating-point library function operators that otherwise 98 // default to expand. 99 for (auto Op : 100 {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT}) 101 setOperationAction(Op, T, Legal); 102 // Support minimum and maximum, which otherwise default to expand. 103 setOperationAction(ISD::FMINIMUM, T, Legal); 104 setOperationAction(ISD::FMAXIMUM, T, Legal); 105 // WebAssembly currently has no builtin f16 support. 106 setOperationAction(ISD::FP16_TO_FP, T, Expand); 107 setOperationAction(ISD::FP_TO_FP16, T, Expand); 108 setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand); 109 setTruncStoreAction(T, MVT::f16, Expand); 110 } 111 112 // Expand unavailable integer operations. 113 for (auto Op : 114 {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU, 115 ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS, 116 ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) { 117 for (auto T : {MVT::i32, MVT::i64}) 118 setOperationAction(Op, T, Expand); 119 if (Subtarget->hasSIMD128()) 120 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) 121 setOperationAction(Op, T, Expand); 122 } 123 124 if (Subtarget->hasNontrappingFPToInt()) 125 for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}) 126 for (auto T : {MVT::i32, MVT::i64}) 127 setOperationAction(Op, T, Custom); 128 129 // SIMD-specific configuration 130 if (Subtarget->hasSIMD128()) { 131 // Hoist bitcasts out of shuffles 132 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 133 134 // Combine extends of extract_subvectors into widening ops 135 setTargetDAGCombine(ISD::SIGN_EXTEND); 136 setTargetDAGCombine(ISD::ZERO_EXTEND); 137 138 // Combine {s,u}int_to_fp of extract_vectors into conversion ops 139 setTargetDAGCombine(ISD::SINT_TO_FP); 140 setTargetDAGCombine(ISD::UINT_TO_FP); 141 142 // Combine concat of {s,u}int_to_fp_sat to i32x4.trunc_sat_f64x2_zero_{s,u} 143 setTargetDAGCombine(ISD::CONCAT_VECTORS); 144 145 // Support saturating add for i8x16 and i16x8 146 for (auto Op : {ISD::SADDSAT, ISD::UADDSAT}) 147 for (auto T : {MVT::v16i8, MVT::v8i16}) 148 setOperationAction(Op, T, Legal); 149 150 // Support integer abs 151 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) 152 setOperationAction(ISD::ABS, T, Legal); 153 154 // Custom lower BUILD_VECTORs to minimize number of replace_lanes 155 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, 156 MVT::v2f64}) 157 setOperationAction(ISD::BUILD_VECTOR, T, Custom); 158 159 // We have custom shuffle lowering to expose the shuffle mask 160 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, 161 MVT::v2f64}) 162 setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom); 163 164 // Custom lowering since wasm shifts must have a scalar shift amount 165 for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL}) 166 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) 167 setOperationAction(Op, T, Custom); 168 169 // Custom lower lane accesses to expand out variable indices 170 for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT}) 171 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, 172 MVT::v2f64}) 173 setOperationAction(Op, T, Custom); 174 175 // There is no i8x16.mul instruction 176 setOperationAction(ISD::MUL, MVT::v16i8, Expand); 177 178 // There is no vector conditional select instruction 179 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, 180 MVT::v2f64}) 181 setOperationAction(ISD::SELECT_CC, T, Expand); 182 183 // Expand integer operations supported for scalars but not SIMD 184 for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV, 185 ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR}) 186 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) 187 setOperationAction(Op, T, Expand); 188 189 // But we do have integer min and max operations 190 for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) 191 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) 192 setOperationAction(Op, T, Legal); 193 194 // Expand float operations supported for scalars but not SIMD 195 for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, 196 ISD::FEXP, ISD::FEXP2, ISD::FRINT}) 197 for (auto T : {MVT::v4f32, MVT::v2f64}) 198 setOperationAction(Op, T, Expand); 199 200 // Unsigned comparison operations are unavailable for i64x2 vectors. 201 for (auto CC : {ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE}) 202 setCondCodeAction(CC, MVT::v2i64, Custom); 203 204 // 64x2 conversions are not in the spec 205 for (auto Op : 206 {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT}) 207 for (auto T : {MVT::v2i64, MVT::v2f64}) 208 setOperationAction(Op, T, Expand); 209 210 // But saturating fp_to_int converstions are 211 for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}) 212 setOperationAction(Op, MVT::v4i32, Custom); 213 } 214 215 // As a special case, these operators use the type to mean the type to 216 // sign-extend from. 217 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 218 if (!Subtarget->hasSignExt()) { 219 // Sign extends are legal only when extending a vector extract 220 auto Action = Subtarget->hasSIMD128() ? Custom : Expand; 221 for (auto T : {MVT::i8, MVT::i16, MVT::i32}) 222 setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action); 223 } 224 for (auto T : MVT::integer_fixedlen_vector_valuetypes()) 225 setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand); 226 227 // Dynamic stack allocation: use the default expansion. 228 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 229 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 230 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand); 231 232 setOperationAction(ISD::FrameIndex, MVT::i32, Custom); 233 setOperationAction(ISD::FrameIndex, MVT::i64, Custom); 234 setOperationAction(ISD::CopyToReg, MVT::Other, Custom); 235 236 // Expand these forms; we pattern-match the forms that we can handle in isel. 237 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) 238 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC}) 239 setOperationAction(Op, T, Expand); 240 241 // We have custom switch handling. 242 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 243 244 // WebAssembly doesn't have: 245 // - Floating-point extending loads. 246 // - Floating-point truncating stores. 247 // - i1 extending loads. 248 // - truncating SIMD stores and most extending loads 249 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 250 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 251 for (auto T : MVT::integer_valuetypes()) 252 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}) 253 setLoadExtAction(Ext, T, MVT::i1, Promote); 254 if (Subtarget->hasSIMD128()) { 255 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, 256 MVT::v2f64}) { 257 for (auto MemT : MVT::fixedlen_vector_valuetypes()) { 258 if (MVT(T) != MemT) { 259 setTruncStoreAction(T, MemT, Expand); 260 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}) 261 setLoadExtAction(Ext, T, MemT, Expand); 262 } 263 } 264 } 265 // But some vector extending loads are legal 266 for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) { 267 setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal); 268 setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal); 269 setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal); 270 } 271 // And some truncating stores are legal as well 272 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); 273 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); 274 } 275 276 // Don't do anything clever with build_pairs 277 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 278 279 // Trap lowers to wasm unreachable 280 setOperationAction(ISD::TRAP, MVT::Other, Legal); 281 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 282 283 // Exception handling intrinsics 284 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 285 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 286 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 287 288 setMaxAtomicSizeInBitsSupported(64); 289 290 // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is 291 // consistent with the f64 and f128 names. 292 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); 293 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); 294 295 // Define the emscripten name for return address helper. 296 // TODO: when implementing other Wasm backends, make this generic or only do 297 // this on emscripten depending on what they end up doing. 298 setLibcallName(RTLIB::RETURN_ADDRESS, "emscripten_return_address"); 299 300 // Always convert switches to br_tables unless there is only one case, which 301 // is equivalent to a simple branch. This reduces code size for wasm, and we 302 // defer possible jump table optimizations to the VM. 303 setMinimumJumpTableEntries(2); 304 } 305 306 TargetLowering::AtomicExpansionKind 307 WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 308 // We have wasm instructions for these 309 switch (AI->getOperation()) { 310 case AtomicRMWInst::Add: 311 case AtomicRMWInst::Sub: 312 case AtomicRMWInst::And: 313 case AtomicRMWInst::Or: 314 case AtomicRMWInst::Xor: 315 case AtomicRMWInst::Xchg: 316 return AtomicExpansionKind::None; 317 default: 318 break; 319 } 320 return AtomicExpansionKind::CmpXChg; 321 } 322 323 FastISel *WebAssemblyTargetLowering::createFastISel( 324 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const { 325 return WebAssembly::createFastISel(FuncInfo, LibInfo); 326 } 327 328 MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/, 329 EVT VT) const { 330 unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1); 331 if (BitWidth > 1 && BitWidth < 8) 332 BitWidth = 8; 333 334 if (BitWidth > 64) { 335 // The shift will be lowered to a libcall, and compiler-rt libcalls expect 336 // the count to be an i32. 337 BitWidth = 32; 338 assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) && 339 "32-bit shift counts ought to be enough for anyone"); 340 } 341 342 MVT Result = MVT::getIntegerVT(BitWidth); 343 assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE && 344 "Unable to represent scalar shift amount type"); 345 return Result; 346 } 347 348 // Lower an fp-to-int conversion operator from the LLVM opcode, which has an 349 // undefined result on invalid/overflow, to the WebAssembly opcode, which 350 // traps on invalid/overflow. 351 static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL, 352 MachineBasicBlock *BB, 353 const TargetInstrInfo &TII, 354 bool IsUnsigned, bool Int64, 355 bool Float64, unsigned LoweredOpcode) { 356 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 357 358 Register OutReg = MI.getOperand(0).getReg(); 359 Register InReg = MI.getOperand(1).getReg(); 360 361 unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32; 362 unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32; 363 unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32; 364 unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32; 365 unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32; 366 unsigned Eqz = WebAssembly::EQZ_I32; 367 unsigned And = WebAssembly::AND_I32; 368 int64_t Limit = Int64 ? INT64_MIN : INT32_MIN; 369 int64_t Substitute = IsUnsigned ? 0 : Limit; 370 double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit; 371 auto &Context = BB->getParent()->getFunction().getContext(); 372 Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context); 373 374 const BasicBlock *LLVMBB = BB->getBasicBlock(); 375 MachineFunction *F = BB->getParent(); 376 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB); 377 MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB); 378 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB); 379 380 MachineFunction::iterator It = ++BB->getIterator(); 381 F->insert(It, FalseMBB); 382 F->insert(It, TrueMBB); 383 F->insert(It, DoneMBB); 384 385 // Transfer the remainder of BB and its successor edges to DoneMBB. 386 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end()); 387 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 388 389 BB->addSuccessor(TrueMBB); 390 BB->addSuccessor(FalseMBB); 391 TrueMBB->addSuccessor(DoneMBB); 392 FalseMBB->addSuccessor(DoneMBB); 393 394 unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg; 395 Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); 396 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); 397 CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 398 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 399 FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg)); 400 TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg)); 401 402 MI.eraseFromParent(); 403 // For signed numbers, we can do a single comparison to determine whether 404 // fabs(x) is within range. 405 if (IsUnsigned) { 406 Tmp0 = InReg; 407 } else { 408 BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg); 409 } 410 BuildMI(BB, DL, TII.get(FConst), Tmp1) 411 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal))); 412 BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1); 413 414 // For unsigned numbers, we have to do a separate comparison with zero. 415 if (IsUnsigned) { 416 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); 417 Register SecondCmpReg = 418 MRI.createVirtualRegister(&WebAssembly::I32RegClass); 419 Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 420 BuildMI(BB, DL, TII.get(FConst), Tmp1) 421 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0))); 422 BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1); 423 BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg); 424 CmpReg = AndReg; 425 } 426 427 BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg); 428 429 // Create the CFG diamond to select between doing the conversion or using 430 // the substitute value. 431 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg); 432 BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg); 433 BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB); 434 BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute); 435 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg) 436 .addReg(FalseReg) 437 .addMBB(FalseMBB) 438 .addReg(TrueReg) 439 .addMBB(TrueMBB); 440 441 return DoneMBB; 442 } 443 444 static MachineBasicBlock * 445 LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, 446 const WebAssemblySubtarget *Subtarget, 447 const TargetInstrInfo &TII) { 448 MachineInstr &CallParams = *CallResults.getPrevNode(); 449 assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS); 450 assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS || 451 CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS); 452 453 bool IsIndirect = CallParams.getOperand(0).isReg(); 454 bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS; 455 456 unsigned CallOp; 457 if (IsIndirect && IsRetCall) { 458 CallOp = WebAssembly::RET_CALL_INDIRECT; 459 } else if (IsIndirect) { 460 CallOp = WebAssembly::CALL_INDIRECT; 461 } else if (IsRetCall) { 462 CallOp = WebAssembly::RET_CALL; 463 } else { 464 CallOp = WebAssembly::CALL; 465 } 466 467 MachineFunction &MF = *BB->getParent(); 468 const MCInstrDesc &MCID = TII.get(CallOp); 469 MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL)); 470 471 // See if we must truncate the function pointer. 472 // CALL_INDIRECT takes an i32, but in wasm64 we represent function pointers 473 // as 64-bit for uniformity with other pointer types. 474 // See also: WebAssemblyFastISel::selectCall 475 if (IsIndirect && MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()) { 476 Register Reg32 = 477 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass); 478 auto &FnPtr = CallParams.getOperand(0); 479 BuildMI(*BB, CallResults.getIterator(), DL, 480 TII.get(WebAssembly::I32_WRAP_I64), Reg32) 481 .addReg(FnPtr.getReg()); 482 FnPtr.setReg(Reg32); 483 } 484 485 // Move the function pointer to the end of the arguments for indirect calls 486 if (IsIndirect) { 487 auto FnPtr = CallParams.getOperand(0); 488 CallParams.RemoveOperand(0); 489 CallParams.addOperand(FnPtr); 490 } 491 492 for (auto Def : CallResults.defs()) 493 MIB.add(Def); 494 495 if (IsIndirect) { 496 // Placeholder for the type index. 497 MIB.addImm(0); 498 // The table into which this call_indirect indexes. 499 MCSymbolWasm *Table = 500 WebAssembly::getOrCreateFunctionTableSymbol(MF.getContext(), Subtarget); 501 if (Subtarget->hasReferenceTypes()) { 502 MIB.addSym(Table); 503 } else { 504 // For the MVP there is at most one table whose number is 0, but we can't 505 // write a table symbol or issue relocations. Instead we just ensure the 506 // table is live and write a zero. 507 Table->setNoStrip(); 508 MIB.addImm(0); 509 } 510 } 511 512 for (auto Use : CallParams.uses()) 513 MIB.add(Use); 514 515 BB->insert(CallResults.getIterator(), MIB); 516 CallParams.eraseFromParent(); 517 CallResults.eraseFromParent(); 518 519 return BB; 520 } 521 522 MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter( 523 MachineInstr &MI, MachineBasicBlock *BB) const { 524 const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); 525 DebugLoc DL = MI.getDebugLoc(); 526 527 switch (MI.getOpcode()) { 528 default: 529 llvm_unreachable("Unexpected instr type to insert"); 530 case WebAssembly::FP_TO_SINT_I32_F32: 531 return LowerFPToInt(MI, DL, BB, TII, false, false, false, 532 WebAssembly::I32_TRUNC_S_F32); 533 case WebAssembly::FP_TO_UINT_I32_F32: 534 return LowerFPToInt(MI, DL, BB, TII, true, false, false, 535 WebAssembly::I32_TRUNC_U_F32); 536 case WebAssembly::FP_TO_SINT_I64_F32: 537 return LowerFPToInt(MI, DL, BB, TII, false, true, false, 538 WebAssembly::I64_TRUNC_S_F32); 539 case WebAssembly::FP_TO_UINT_I64_F32: 540 return LowerFPToInt(MI, DL, BB, TII, true, true, false, 541 WebAssembly::I64_TRUNC_U_F32); 542 case WebAssembly::FP_TO_SINT_I32_F64: 543 return LowerFPToInt(MI, DL, BB, TII, false, false, true, 544 WebAssembly::I32_TRUNC_S_F64); 545 case WebAssembly::FP_TO_UINT_I32_F64: 546 return LowerFPToInt(MI, DL, BB, TII, true, false, true, 547 WebAssembly::I32_TRUNC_U_F64); 548 case WebAssembly::FP_TO_SINT_I64_F64: 549 return LowerFPToInt(MI, DL, BB, TII, false, true, true, 550 WebAssembly::I64_TRUNC_S_F64); 551 case WebAssembly::FP_TO_UINT_I64_F64: 552 return LowerFPToInt(MI, DL, BB, TII, true, true, true, 553 WebAssembly::I64_TRUNC_U_F64); 554 case WebAssembly::CALL_RESULTS: 555 case WebAssembly::RET_CALL_RESULTS: 556 return LowerCallResults(MI, DL, BB, Subtarget, TII); 557 } 558 } 559 560 const char * 561 WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const { 562 switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) { 563 case WebAssemblyISD::FIRST_NUMBER: 564 case WebAssemblyISD::FIRST_MEM_OPCODE: 565 break; 566 #define HANDLE_NODETYPE(NODE) \ 567 case WebAssemblyISD::NODE: \ 568 return "WebAssemblyISD::" #NODE; 569 #define HANDLE_MEM_NODETYPE(NODE) HANDLE_NODETYPE(NODE) 570 #include "WebAssemblyISD.def" 571 #undef HANDLE_MEM_NODETYPE 572 #undef HANDLE_NODETYPE 573 } 574 return nullptr; 575 } 576 577 std::pair<unsigned, const TargetRegisterClass *> 578 WebAssemblyTargetLowering::getRegForInlineAsmConstraint( 579 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { 580 // First, see if this is a constraint that directly corresponds to a 581 // WebAssembly register class. 582 if (Constraint.size() == 1) { 583 switch (Constraint[0]) { 584 case 'r': 585 assert(VT != MVT::iPTR && "Pointer MVT not expected here"); 586 if (Subtarget->hasSIMD128() && VT.isVector()) { 587 if (VT.getSizeInBits() == 128) 588 return std::make_pair(0U, &WebAssembly::V128RegClass); 589 } 590 if (VT.isInteger() && !VT.isVector()) { 591 if (VT.getSizeInBits() <= 32) 592 return std::make_pair(0U, &WebAssembly::I32RegClass); 593 if (VT.getSizeInBits() <= 64) 594 return std::make_pair(0U, &WebAssembly::I64RegClass); 595 } 596 if (VT.isFloatingPoint() && !VT.isVector()) { 597 switch (VT.getSizeInBits()) { 598 case 32: 599 return std::make_pair(0U, &WebAssembly::F32RegClass); 600 case 64: 601 return std::make_pair(0U, &WebAssembly::F64RegClass); 602 default: 603 break; 604 } 605 } 606 break; 607 default: 608 break; 609 } 610 } 611 612 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 613 } 614 615 bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const { 616 // Assume ctz is a relatively cheap operation. 617 return true; 618 } 619 620 bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const { 621 // Assume clz is a relatively cheap operation. 622 return true; 623 } 624 625 bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL, 626 const AddrMode &AM, 627 Type *Ty, unsigned AS, 628 Instruction *I) const { 629 // WebAssembly offsets are added as unsigned without wrapping. The 630 // isLegalAddressingMode gives us no way to determine if wrapping could be 631 // happening, so we approximate this by accepting only non-negative offsets. 632 if (AM.BaseOffs < 0) 633 return false; 634 635 // WebAssembly has no scale register operands. 636 if (AM.Scale != 0) 637 return false; 638 639 // Everything else is legal. 640 return true; 641 } 642 643 bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses( 644 EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/, 645 MachineMemOperand::Flags /*Flags*/, bool *Fast) const { 646 // WebAssembly supports unaligned accesses, though it should be declared 647 // with the p2align attribute on loads and stores which do so, and there 648 // may be a performance impact. We tell LLVM they're "fast" because 649 // for the kinds of things that LLVM uses this for (merging adjacent stores 650 // of constants, etc.), WebAssembly implementations will either want the 651 // unaligned access or they'll split anyway. 652 if (Fast) 653 *Fast = true; 654 return true; 655 } 656 657 bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT, 658 AttributeList Attr) const { 659 // The current thinking is that wasm engines will perform this optimization, 660 // so we can save on code size. 661 return true; 662 } 663 664 bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { 665 EVT ExtT = ExtVal.getValueType(); 666 EVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getValueType(0); 667 return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) || 668 (ExtT == MVT::v4i32 && MemT == MVT::v4i16) || 669 (ExtT == MVT::v2i64 && MemT == MVT::v2i32); 670 } 671 672 EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL, 673 LLVMContext &C, 674 EVT VT) const { 675 if (VT.isVector()) 676 return VT.changeVectorElementTypeToInteger(); 677 678 // So far, all branch instructions in Wasm take an I32 condition. 679 // The default TargetLowering::getSetCCResultType returns the pointer size, 680 // which would be useful to reduce instruction counts when testing 681 // against 64-bit pointers/values if at some point Wasm supports that. 682 return EVT::getIntegerVT(C, 32); 683 } 684 685 bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 686 const CallInst &I, 687 MachineFunction &MF, 688 unsigned Intrinsic) const { 689 switch (Intrinsic) { 690 case Intrinsic::wasm_memory_atomic_notify: 691 Info.opc = ISD::INTRINSIC_W_CHAIN; 692 Info.memVT = MVT::i32; 693 Info.ptrVal = I.getArgOperand(0); 694 Info.offset = 0; 695 Info.align = Align(4); 696 // atomic.notify instruction does not really load the memory specified with 697 // this argument, but MachineMemOperand should either be load or store, so 698 // we set this to a load. 699 // FIXME Volatile isn't really correct, but currently all LLVM atomic 700 // instructions are treated as volatiles in the backend, so we should be 701 // consistent. The same applies for wasm_atomic_wait intrinsics too. 702 Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; 703 return true; 704 case Intrinsic::wasm_memory_atomic_wait32: 705 Info.opc = ISD::INTRINSIC_W_CHAIN; 706 Info.memVT = MVT::i32; 707 Info.ptrVal = I.getArgOperand(0); 708 Info.offset = 0; 709 Info.align = Align(4); 710 Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; 711 return true; 712 case Intrinsic::wasm_memory_atomic_wait64: 713 Info.opc = ISD::INTRINSIC_W_CHAIN; 714 Info.memVT = MVT::i64; 715 Info.ptrVal = I.getArgOperand(0); 716 Info.offset = 0; 717 Info.align = Align(8); 718 Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; 719 return true; 720 case Intrinsic::wasm_load32_zero: 721 case Intrinsic::wasm_load64_zero: 722 Info.opc = ISD::INTRINSIC_W_CHAIN; 723 Info.memVT = Intrinsic == Intrinsic::wasm_load32_zero ? MVT::i32 : MVT::i64; 724 Info.ptrVal = I.getArgOperand(0); 725 Info.offset = 0; 726 Info.align = Info.memVT == MVT::i32 ? Align(4) : Align(8); 727 Info.flags = MachineMemOperand::MOLoad; 728 return true; 729 case Intrinsic::wasm_load8_lane: 730 case Intrinsic::wasm_load16_lane: 731 case Intrinsic::wasm_load32_lane: 732 case Intrinsic::wasm_load64_lane: 733 case Intrinsic::wasm_store8_lane: 734 case Intrinsic::wasm_store16_lane: 735 case Intrinsic::wasm_store32_lane: 736 case Intrinsic::wasm_store64_lane: { 737 MVT MemVT; 738 Align MemAlign; 739 switch (Intrinsic) { 740 case Intrinsic::wasm_load8_lane: 741 case Intrinsic::wasm_store8_lane: 742 MemVT = MVT::i8; 743 MemAlign = Align(1); 744 break; 745 case Intrinsic::wasm_load16_lane: 746 case Intrinsic::wasm_store16_lane: 747 MemVT = MVT::i16; 748 MemAlign = Align(2); 749 break; 750 case Intrinsic::wasm_load32_lane: 751 case Intrinsic::wasm_store32_lane: 752 MemVT = MVT::i32; 753 MemAlign = Align(4); 754 break; 755 case Intrinsic::wasm_load64_lane: 756 case Intrinsic::wasm_store64_lane: 757 MemVT = MVT::i64; 758 MemAlign = Align(8); 759 break; 760 default: 761 llvm_unreachable("unexpected intrinsic"); 762 } 763 if (Intrinsic == Intrinsic::wasm_load8_lane || 764 Intrinsic == Intrinsic::wasm_load16_lane || 765 Intrinsic == Intrinsic::wasm_load32_lane || 766 Intrinsic == Intrinsic::wasm_load64_lane) { 767 Info.opc = ISD::INTRINSIC_W_CHAIN; 768 Info.flags = MachineMemOperand::MOLoad; 769 } else { 770 Info.opc = ISD::INTRINSIC_VOID; 771 Info.flags = MachineMemOperand::MOStore; 772 } 773 Info.ptrVal = I.getArgOperand(0); 774 Info.memVT = MemVT; 775 Info.offset = 0; 776 Info.align = MemAlign; 777 return true; 778 } 779 default: 780 return false; 781 } 782 } 783 784 //===----------------------------------------------------------------------===// 785 // WebAssembly Lowering private implementation. 786 //===----------------------------------------------------------------------===// 787 788 //===----------------------------------------------------------------------===// 789 // Lowering Code 790 //===----------------------------------------------------------------------===// 791 792 static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) { 793 MachineFunction &MF = DAG.getMachineFunction(); 794 DAG.getContext()->diagnose( 795 DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc())); 796 } 797 798 // Test whether the given calling convention is supported. 799 static bool callingConvSupported(CallingConv::ID CallConv) { 800 // We currently support the language-independent target-independent 801 // conventions. We don't yet have a way to annotate calls with properties like 802 // "cold", and we don't have any call-clobbered registers, so these are mostly 803 // all handled the same. 804 return CallConv == CallingConv::C || CallConv == CallingConv::Fast || 805 CallConv == CallingConv::Cold || 806 CallConv == CallingConv::PreserveMost || 807 CallConv == CallingConv::PreserveAll || 808 CallConv == CallingConv::CXX_FAST_TLS || 809 CallConv == CallingConv::WASM_EmscriptenInvoke || 810 CallConv == CallingConv::Swift; 811 } 812 813 SDValue 814 WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, 815 SmallVectorImpl<SDValue> &InVals) const { 816 SelectionDAG &DAG = CLI.DAG; 817 SDLoc DL = CLI.DL; 818 SDValue Chain = CLI.Chain; 819 SDValue Callee = CLI.Callee; 820 MachineFunction &MF = DAG.getMachineFunction(); 821 auto Layout = MF.getDataLayout(); 822 823 CallingConv::ID CallConv = CLI.CallConv; 824 if (!callingConvSupported(CallConv)) 825 fail(DL, DAG, 826 "WebAssembly doesn't support language-specific or target-specific " 827 "calling conventions yet"); 828 if (CLI.IsPatchPoint) 829 fail(DL, DAG, "WebAssembly doesn't support patch point yet"); 830 831 if (CLI.IsTailCall) { 832 auto NoTail = [&](const char *Msg) { 833 if (CLI.CB && CLI.CB->isMustTailCall()) 834 fail(DL, DAG, Msg); 835 CLI.IsTailCall = false; 836 }; 837 838 if (!Subtarget->hasTailCall()) 839 NoTail("WebAssembly 'tail-call' feature not enabled"); 840 841 // Varargs calls cannot be tail calls because the buffer is on the stack 842 if (CLI.IsVarArg) 843 NoTail("WebAssembly does not support varargs tail calls"); 844 845 // Do not tail call unless caller and callee return types match 846 const Function &F = MF.getFunction(); 847 const TargetMachine &TM = getTargetMachine(); 848 Type *RetTy = F.getReturnType(); 849 SmallVector<MVT, 4> CallerRetTys; 850 SmallVector<MVT, 4> CalleeRetTys; 851 computeLegalValueVTs(F, TM, RetTy, CallerRetTys); 852 computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys); 853 bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() && 854 std::equal(CallerRetTys.begin(), CallerRetTys.end(), 855 CalleeRetTys.begin()); 856 if (!TypesMatch) 857 NoTail("WebAssembly tail call requires caller and callee return types to " 858 "match"); 859 860 // If pointers to local stack values are passed, we cannot tail call 861 if (CLI.CB) { 862 for (auto &Arg : CLI.CB->args()) { 863 Value *Val = Arg.get(); 864 // Trace the value back through pointer operations 865 while (true) { 866 Value *Src = Val->stripPointerCastsAndAliases(); 867 if (auto *GEP = dyn_cast<GetElementPtrInst>(Src)) 868 Src = GEP->getPointerOperand(); 869 if (Val == Src) 870 break; 871 Val = Src; 872 } 873 if (isa<AllocaInst>(Val)) { 874 NoTail( 875 "WebAssembly does not support tail calling with stack arguments"); 876 break; 877 } 878 } 879 } 880 } 881 882 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 883 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 884 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 885 886 // The generic code may have added an sret argument. If we're lowering an 887 // invoke function, the ABI requires that the function pointer be the first 888 // argument, so we may have to swap the arguments. 889 if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 && 890 Outs[0].Flags.isSRet()) { 891 std::swap(Outs[0], Outs[1]); 892 std::swap(OutVals[0], OutVals[1]); 893 } 894 895 bool HasSwiftSelfArg = false; 896 bool HasSwiftErrorArg = false; 897 unsigned NumFixedArgs = 0; 898 for (unsigned I = 0; I < Outs.size(); ++I) { 899 const ISD::OutputArg &Out = Outs[I]; 900 SDValue &OutVal = OutVals[I]; 901 HasSwiftSelfArg |= Out.Flags.isSwiftSelf(); 902 HasSwiftErrorArg |= Out.Flags.isSwiftError(); 903 if (Out.Flags.isNest()) 904 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); 905 if (Out.Flags.isInAlloca()) 906 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); 907 if (Out.Flags.isInConsecutiveRegs()) 908 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); 909 if (Out.Flags.isInConsecutiveRegsLast()) 910 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); 911 if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) { 912 auto &MFI = MF.getFrameInfo(); 913 int FI = MFI.CreateStackObject(Out.Flags.getByValSize(), 914 Out.Flags.getNonZeroByValAlign(), 915 /*isSS=*/false); 916 SDValue SizeNode = 917 DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32); 918 SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout)); 919 Chain = DAG.getMemcpy( 920 Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getNonZeroByValAlign(), 921 /*isVolatile*/ false, /*AlwaysInline=*/false, 922 /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo()); 923 OutVal = FINode; 924 } 925 // Count the number of fixed args *after* legalization. 926 NumFixedArgs += Out.IsFixed; 927 } 928 929 bool IsVarArg = CLI.IsVarArg; 930 auto PtrVT = getPointerTy(Layout); 931 932 // For swiftcc, emit additional swiftself and swifterror arguments 933 // if there aren't. These additional arguments are also added for callee 934 // signature They are necessary to match callee and caller signature for 935 // indirect call. 936 if (CallConv == CallingConv::Swift) { 937 if (!HasSwiftSelfArg) { 938 NumFixedArgs++; 939 ISD::OutputArg Arg; 940 Arg.Flags.setSwiftSelf(); 941 CLI.Outs.push_back(Arg); 942 SDValue ArgVal = DAG.getUNDEF(PtrVT); 943 CLI.OutVals.push_back(ArgVal); 944 } 945 if (!HasSwiftErrorArg) { 946 NumFixedArgs++; 947 ISD::OutputArg Arg; 948 Arg.Flags.setSwiftError(); 949 CLI.Outs.push_back(Arg); 950 SDValue ArgVal = DAG.getUNDEF(PtrVT); 951 CLI.OutVals.push_back(ArgVal); 952 } 953 } 954 955 // Analyze operands of the call, assigning locations to each operand. 956 SmallVector<CCValAssign, 16> ArgLocs; 957 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 958 959 if (IsVarArg) { 960 // Outgoing non-fixed arguments are placed in a buffer. First 961 // compute their offsets and the total amount of buffer space needed. 962 for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) { 963 const ISD::OutputArg &Out = Outs[I]; 964 SDValue &Arg = OutVals[I]; 965 EVT VT = Arg.getValueType(); 966 assert(VT != MVT::iPTR && "Legalized args should be concrete"); 967 Type *Ty = VT.getTypeForEVT(*DAG.getContext()); 968 Align Alignment = 969 std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty)); 970 unsigned Offset = 971 CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment); 972 CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(), 973 Offset, VT.getSimpleVT(), 974 CCValAssign::Full)); 975 } 976 } 977 978 unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); 979 980 SDValue FINode; 981 if (IsVarArg && NumBytes) { 982 // For non-fixed arguments, next emit stores to store the argument values 983 // to the stack buffer at the offsets computed above. 984 int FI = MF.getFrameInfo().CreateStackObject(NumBytes, 985 Layout.getStackAlignment(), 986 /*isSS=*/false); 987 unsigned ValNo = 0; 988 SmallVector<SDValue, 8> Chains; 989 for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) { 990 assert(ArgLocs[ValNo].getValNo() == ValNo && 991 "ArgLocs should remain in order and only hold varargs args"); 992 unsigned Offset = ArgLocs[ValNo++].getLocMemOffset(); 993 FINode = DAG.getFrameIndex(FI, getPointerTy(Layout)); 994 SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode, 995 DAG.getConstant(Offset, DL, PtrVT)); 996 Chains.push_back( 997 DAG.getStore(Chain, DL, Arg, Add, 998 MachinePointerInfo::getFixedStack(MF, FI, Offset))); 999 } 1000 if (!Chains.empty()) 1001 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); 1002 } else if (IsVarArg) { 1003 FINode = DAG.getIntPtrConstant(0, DL); 1004 } 1005 1006 if (Callee->getOpcode() == ISD::GlobalAddress) { 1007 // If the callee is a GlobalAddress node (quite common, every direct call 1008 // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress 1009 // doesn't at MO_GOT which is not needed for direct calls. 1010 GlobalAddressSDNode* GA = cast<GlobalAddressSDNode>(Callee); 1011 Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), DL, 1012 getPointerTy(DAG.getDataLayout()), 1013 GA->getOffset()); 1014 Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL, 1015 getPointerTy(DAG.getDataLayout()), Callee); 1016 } 1017 1018 // Compute the operands for the CALLn node. 1019 SmallVector<SDValue, 16> Ops; 1020 Ops.push_back(Chain); 1021 Ops.push_back(Callee); 1022 1023 // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs 1024 // isn't reliable. 1025 Ops.append(OutVals.begin(), 1026 IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end()); 1027 // Add a pointer to the vararg buffer. 1028 if (IsVarArg) 1029 Ops.push_back(FINode); 1030 1031 SmallVector<EVT, 8> InTys; 1032 for (const auto &In : Ins) { 1033 assert(!In.Flags.isByVal() && "byval is not valid for return values"); 1034 assert(!In.Flags.isNest() && "nest is not valid for return values"); 1035 if (In.Flags.isInAlloca()) 1036 fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values"); 1037 if (In.Flags.isInConsecutiveRegs()) 1038 fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values"); 1039 if (In.Flags.isInConsecutiveRegsLast()) 1040 fail(DL, DAG, 1041 "WebAssembly hasn't implemented cons regs last return values"); 1042 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in 1043 // registers. 1044 InTys.push_back(In.VT); 1045 } 1046 1047 if (CLI.IsTailCall) { 1048 // ret_calls do not return values to the current frame 1049 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1050 return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops); 1051 } 1052 1053 InTys.push_back(MVT::Other); 1054 SDVTList InTyList = DAG.getVTList(InTys); 1055 SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops); 1056 1057 for (size_t I = 0; I < Ins.size(); ++I) 1058 InVals.push_back(Res.getValue(I)); 1059 1060 // Return the chain 1061 return Res.getValue(Ins.size()); 1062 } 1063 1064 bool WebAssemblyTargetLowering::CanLowerReturn( 1065 CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/, 1066 const SmallVectorImpl<ISD::OutputArg> &Outs, 1067 LLVMContext & /*Context*/) const { 1068 // WebAssembly can only handle returning tuples with multivalue enabled 1069 return Subtarget->hasMultivalue() || Outs.size() <= 1; 1070 } 1071 1072 SDValue WebAssemblyTargetLowering::LowerReturn( 1073 SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/, 1074 const SmallVectorImpl<ISD::OutputArg> &Outs, 1075 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 1076 SelectionDAG &DAG) const { 1077 assert((Subtarget->hasMultivalue() || Outs.size() <= 1) && 1078 "MVP WebAssembly can only return up to one value"); 1079 if (!callingConvSupported(CallConv)) 1080 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); 1081 1082 SmallVector<SDValue, 4> RetOps(1, Chain); 1083 RetOps.append(OutVals.begin(), OutVals.end()); 1084 Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps); 1085 1086 // Record the number and types of the return values. 1087 for (const ISD::OutputArg &Out : Outs) { 1088 assert(!Out.Flags.isByVal() && "byval is not valid for return values"); 1089 assert(!Out.Flags.isNest() && "nest is not valid for return values"); 1090 assert(Out.IsFixed && "non-fixed return value is not valid"); 1091 if (Out.Flags.isInAlloca()) 1092 fail(DL, DAG, "WebAssembly hasn't implemented inalloca results"); 1093 if (Out.Flags.isInConsecutiveRegs()) 1094 fail(DL, DAG, "WebAssembly hasn't implemented cons regs results"); 1095 if (Out.Flags.isInConsecutiveRegsLast()) 1096 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results"); 1097 } 1098 1099 return Chain; 1100 } 1101 1102 SDValue WebAssemblyTargetLowering::LowerFormalArguments( 1103 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 1104 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 1105 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 1106 if (!callingConvSupported(CallConv)) 1107 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); 1108 1109 MachineFunction &MF = DAG.getMachineFunction(); 1110 auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>(); 1111 1112 // Set up the incoming ARGUMENTS value, which serves to represent the liveness 1113 // of the incoming values before they're represented by virtual registers. 1114 MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS); 1115 1116 bool HasSwiftErrorArg = false; 1117 bool HasSwiftSelfArg = false; 1118 for (const ISD::InputArg &In : Ins) { 1119 HasSwiftSelfArg |= In.Flags.isSwiftSelf(); 1120 HasSwiftErrorArg |= In.Flags.isSwiftError(); 1121 if (In.Flags.isInAlloca()) 1122 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); 1123 if (In.Flags.isNest()) 1124 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); 1125 if (In.Flags.isInConsecutiveRegs()) 1126 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); 1127 if (In.Flags.isInConsecutiveRegsLast()) 1128 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); 1129 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in 1130 // registers. 1131 InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT, 1132 DAG.getTargetConstant(InVals.size(), 1133 DL, MVT::i32)) 1134 : DAG.getUNDEF(In.VT)); 1135 1136 // Record the number and types of arguments. 1137 MFI->addParam(In.VT); 1138 } 1139 1140 // For swiftcc, emit additional swiftself and swifterror arguments 1141 // if there aren't. These additional arguments are also added for callee 1142 // signature They are necessary to match callee and caller signature for 1143 // indirect call. 1144 auto PtrVT = getPointerTy(MF.getDataLayout()); 1145 if (CallConv == CallingConv::Swift) { 1146 if (!HasSwiftSelfArg) { 1147 MFI->addParam(PtrVT); 1148 } 1149 if (!HasSwiftErrorArg) { 1150 MFI->addParam(PtrVT); 1151 } 1152 } 1153 // Varargs are copied into a buffer allocated by the caller, and a pointer to 1154 // the buffer is passed as an argument. 1155 if (IsVarArg) { 1156 MVT PtrVT = getPointerTy(MF.getDataLayout()); 1157 Register VarargVreg = 1158 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT)); 1159 MFI->setVarargBufferVreg(VarargVreg); 1160 Chain = DAG.getCopyToReg( 1161 Chain, DL, VarargVreg, 1162 DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT, 1163 DAG.getTargetConstant(Ins.size(), DL, MVT::i32))); 1164 MFI->addParam(PtrVT); 1165 } 1166 1167 // Record the number and types of arguments and results. 1168 SmallVector<MVT, 4> Params; 1169 SmallVector<MVT, 4> Results; 1170 computeSignatureVTs(MF.getFunction().getFunctionType(), &MF.getFunction(), 1171 MF.getFunction(), DAG.getTarget(), Params, Results); 1172 for (MVT VT : Results) 1173 MFI->addResult(VT); 1174 // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify 1175 // the param logic here with ComputeSignatureVTs 1176 assert(MFI->getParams().size() == Params.size() && 1177 std::equal(MFI->getParams().begin(), MFI->getParams().end(), 1178 Params.begin())); 1179 1180 return Chain; 1181 } 1182 1183 void WebAssemblyTargetLowering::ReplaceNodeResults( 1184 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 1185 switch (N->getOpcode()) { 1186 case ISD::SIGN_EXTEND_INREG: 1187 // Do not add any results, signifying that N should not be custom lowered 1188 // after all. This happens because simd128 turns on custom lowering for 1189 // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an 1190 // illegal type. 1191 break; 1192 default: 1193 llvm_unreachable( 1194 "ReplaceNodeResults not implemented for this op for WebAssembly!"); 1195 } 1196 } 1197 1198 //===----------------------------------------------------------------------===// 1199 // Custom lowering hooks. 1200 //===----------------------------------------------------------------------===// 1201 1202 SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op, 1203 SelectionDAG &DAG) const { 1204 SDLoc DL(Op); 1205 switch (Op.getOpcode()) { 1206 default: 1207 llvm_unreachable("unimplemented operation lowering"); 1208 return SDValue(); 1209 case ISD::FrameIndex: 1210 return LowerFrameIndex(Op, DAG); 1211 case ISD::GlobalAddress: 1212 return LowerGlobalAddress(Op, DAG); 1213 case ISD::GlobalTLSAddress: 1214 return LowerGlobalTLSAddress(Op, DAG); 1215 case ISD::ExternalSymbol: 1216 return LowerExternalSymbol(Op, DAG); 1217 case ISD::JumpTable: 1218 return LowerJumpTable(Op, DAG); 1219 case ISD::BR_JT: 1220 return LowerBR_JT(Op, DAG); 1221 case ISD::VASTART: 1222 return LowerVASTART(Op, DAG); 1223 case ISD::BlockAddress: 1224 case ISD::BRIND: 1225 fail(DL, DAG, "WebAssembly hasn't implemented computed gotos"); 1226 return SDValue(); 1227 case ISD::RETURNADDR: 1228 return LowerRETURNADDR(Op, DAG); 1229 case ISD::FRAMEADDR: 1230 return LowerFRAMEADDR(Op, DAG); 1231 case ISD::CopyToReg: 1232 return LowerCopyToReg(Op, DAG); 1233 case ISD::EXTRACT_VECTOR_ELT: 1234 case ISD::INSERT_VECTOR_ELT: 1235 return LowerAccessVectorElement(Op, DAG); 1236 case ISD::INTRINSIC_VOID: 1237 case ISD::INTRINSIC_WO_CHAIN: 1238 case ISD::INTRINSIC_W_CHAIN: 1239 return LowerIntrinsic(Op, DAG); 1240 case ISD::SIGN_EXTEND_INREG: 1241 return LowerSIGN_EXTEND_INREG(Op, DAG); 1242 case ISD::BUILD_VECTOR: 1243 return LowerBUILD_VECTOR(Op, DAG); 1244 case ISD::VECTOR_SHUFFLE: 1245 return LowerVECTOR_SHUFFLE(Op, DAG); 1246 case ISD::SETCC: 1247 return LowerSETCC(Op, DAG); 1248 case ISD::SHL: 1249 case ISD::SRA: 1250 case ISD::SRL: 1251 return LowerShift(Op, DAG); 1252 case ISD::FP_TO_SINT_SAT: 1253 case ISD::FP_TO_UINT_SAT: 1254 return LowerFP_TO_INT_SAT(Op, DAG); 1255 } 1256 } 1257 1258 SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op, 1259 SelectionDAG &DAG) const { 1260 SDValue Src = Op.getOperand(2); 1261 if (isa<FrameIndexSDNode>(Src.getNode())) { 1262 // CopyToReg nodes don't support FrameIndex operands. Other targets select 1263 // the FI to some LEA-like instruction, but since we don't have that, we 1264 // need to insert some kind of instruction that can take an FI operand and 1265 // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy 1266 // local.copy between Op and its FI operand. 1267 SDValue Chain = Op.getOperand(0); 1268 SDLoc DL(Op); 1269 unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg(); 1270 EVT VT = Src.getValueType(); 1271 SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32 1272 : WebAssembly::COPY_I64, 1273 DL, VT, Src), 1274 0); 1275 return Op.getNode()->getNumValues() == 1 1276 ? DAG.getCopyToReg(Chain, DL, Reg, Copy) 1277 : DAG.getCopyToReg(Chain, DL, Reg, Copy, 1278 Op.getNumOperands() == 4 ? Op.getOperand(3) 1279 : SDValue()); 1280 } 1281 return SDValue(); 1282 } 1283 1284 SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op, 1285 SelectionDAG &DAG) const { 1286 int FI = cast<FrameIndexSDNode>(Op)->getIndex(); 1287 return DAG.getTargetFrameIndex(FI, Op.getValueType()); 1288 } 1289 1290 SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op, 1291 SelectionDAG &DAG) const { 1292 SDLoc DL(Op); 1293 1294 if (!Subtarget->getTargetTriple().isOSEmscripten()) { 1295 fail(DL, DAG, 1296 "Non-Emscripten WebAssembly hasn't implemented " 1297 "__builtin_return_address"); 1298 return SDValue(); 1299 } 1300 1301 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 1302 return SDValue(); 1303 1304 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1305 MakeLibCallOptions CallOptions; 1306 return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(), 1307 {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL) 1308 .first; 1309 } 1310 1311 SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op, 1312 SelectionDAG &DAG) const { 1313 // Non-zero depths are not supported by WebAssembly currently. Use the 1314 // legalizer's default expansion, which is to return 0 (what this function is 1315 // documented to do). 1316 if (Op.getConstantOperandVal(0) > 0) 1317 return SDValue(); 1318 1319 DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true); 1320 EVT VT = Op.getValueType(); 1321 Register FP = 1322 Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction()); 1323 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT); 1324 } 1325 1326 SDValue 1327 WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op, 1328 SelectionDAG &DAG) const { 1329 SDLoc DL(Op); 1330 const auto *GA = cast<GlobalAddressSDNode>(Op); 1331 MVT PtrVT = getPointerTy(DAG.getDataLayout()); 1332 1333 MachineFunction &MF = DAG.getMachineFunction(); 1334 if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory()) 1335 report_fatal_error("cannot use thread-local storage without bulk memory", 1336 false); 1337 1338 const GlobalValue *GV = GA->getGlobal(); 1339 1340 // Currently Emscripten does not support dynamic linking with threads. 1341 // Therefore, if we have thread-local storage, only the local-exec model 1342 // is possible. 1343 // TODO: remove this and implement proper TLS models once Emscripten 1344 // supports dynamic linking with threads. 1345 if (GV->getThreadLocalMode() != GlobalValue::LocalExecTLSModel && 1346 !Subtarget->getTargetTriple().isOSEmscripten()) { 1347 report_fatal_error("only -ftls-model=local-exec is supported for now on " 1348 "non-Emscripten OSes: variable " + 1349 GV->getName(), 1350 false); 1351 } 1352 1353 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64 1354 : WebAssembly::GLOBAL_GET_I32; 1355 const char *BaseName = MF.createExternalSymbolName("__tls_base"); 1356 1357 SDValue BaseAddr( 1358 DAG.getMachineNode(GlobalGet, DL, PtrVT, 1359 DAG.getTargetExternalSymbol(BaseName, PtrVT)), 1360 0); 1361 1362 SDValue TLSOffset = DAG.getTargetGlobalAddress( 1363 GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL); 1364 SDValue SymAddr = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, TLSOffset); 1365 1366 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr); 1367 } 1368 1369 SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op, 1370 SelectionDAG &DAG) const { 1371 SDLoc DL(Op); 1372 const auto *GA = cast<GlobalAddressSDNode>(Op); 1373 EVT VT = Op.getValueType(); 1374 assert(GA->getTargetFlags() == 0 && 1375 "Unexpected target flags on generic GlobalAddressSDNode"); 1376 if (GA->getAddressSpace() != 0) 1377 fail(DL, DAG, "WebAssembly only expects the 0 address space"); 1378 1379 unsigned OperandFlags = 0; 1380 if (isPositionIndependent()) { 1381 const GlobalValue *GV = GA->getGlobal(); 1382 if (getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) { 1383 MachineFunction &MF = DAG.getMachineFunction(); 1384 MVT PtrVT = getPointerTy(MF.getDataLayout()); 1385 const char *BaseName; 1386 if (GV->getValueType()->isFunctionTy()) { 1387 BaseName = MF.createExternalSymbolName("__table_base"); 1388 OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL; 1389 } 1390 else { 1391 BaseName = MF.createExternalSymbolName("__memory_base"); 1392 OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL; 1393 } 1394 SDValue BaseAddr = 1395 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, 1396 DAG.getTargetExternalSymbol(BaseName, PtrVT)); 1397 1398 SDValue SymAddr = DAG.getNode( 1399 WebAssemblyISD::WrapperPIC, DL, VT, 1400 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(), 1401 OperandFlags)); 1402 1403 return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr); 1404 } else { 1405 OperandFlags = WebAssemblyII::MO_GOT; 1406 } 1407 } 1408 1409 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, 1410 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, 1411 GA->getOffset(), OperandFlags)); 1412 } 1413 1414 SDValue 1415 WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op, 1416 SelectionDAG &DAG) const { 1417 SDLoc DL(Op); 1418 const auto *ES = cast<ExternalSymbolSDNode>(Op); 1419 EVT VT = Op.getValueType(); 1420 assert(ES->getTargetFlags() == 0 && 1421 "Unexpected target flags on generic ExternalSymbolSDNode"); 1422 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, 1423 DAG.getTargetExternalSymbol(ES->getSymbol(), VT)); 1424 } 1425 1426 SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op, 1427 SelectionDAG &DAG) const { 1428 // There's no need for a Wrapper node because we always incorporate a jump 1429 // table operand into a BR_TABLE instruction, rather than ever 1430 // materializing it in a register. 1431 const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 1432 return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(), 1433 JT->getTargetFlags()); 1434 } 1435 1436 SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op, 1437 SelectionDAG &DAG) const { 1438 SDLoc DL(Op); 1439 SDValue Chain = Op.getOperand(0); 1440 const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1)); 1441 SDValue Index = Op.getOperand(2); 1442 assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags"); 1443 1444 SmallVector<SDValue, 8> Ops; 1445 Ops.push_back(Chain); 1446 Ops.push_back(Index); 1447 1448 MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo(); 1449 const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs; 1450 1451 // Add an operand for each case. 1452 for (auto MBB : MBBs) 1453 Ops.push_back(DAG.getBasicBlock(MBB)); 1454 1455 // Add the first MBB as a dummy default target for now. This will be replaced 1456 // with the proper default target (and the preceding range check eliminated) 1457 // if possible by WebAssemblyFixBrTableDefaults. 1458 Ops.push_back(DAG.getBasicBlock(*MBBs.begin())); 1459 return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops); 1460 } 1461 1462 SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op, 1463 SelectionDAG &DAG) const { 1464 SDLoc DL(Op); 1465 EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout()); 1466 1467 auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>(); 1468 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1469 1470 SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL, 1471 MFI->getVarargBufferVreg(), PtrVT); 1472 return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1), 1473 MachinePointerInfo(SV)); 1474 } 1475 1476 static SDValue getCppExceptionSymNode(SDValue Op, unsigned TagIndex, 1477 SelectionDAG &DAG) { 1478 // We only support C++ exceptions for now 1479 int Tag = 1480 cast<ConstantSDNode>(Op.getOperand(TagIndex).getNode())->getZExtValue(); 1481 if (Tag != WebAssembly::CPP_EXCEPTION) 1482 llvm_unreachable("Invalid tag: We only support C++ exceptions for now"); 1483 auto &MF = DAG.getMachineFunction(); 1484 const auto &TLI = DAG.getTargetLoweringInfo(); 1485 MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); 1486 const char *SymName = MF.createExternalSymbolName("__cpp_exception"); 1487 return DAG.getNode(WebAssemblyISD::Wrapper, SDLoc(Op), PtrVT, 1488 DAG.getTargetExternalSymbol(SymName, PtrVT)); 1489 } 1490 1491 SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op, 1492 SelectionDAG &DAG) const { 1493 MachineFunction &MF = DAG.getMachineFunction(); 1494 unsigned IntNo; 1495 switch (Op.getOpcode()) { 1496 case ISD::INTRINSIC_VOID: 1497 case ISD::INTRINSIC_W_CHAIN: 1498 IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 1499 break; 1500 case ISD::INTRINSIC_WO_CHAIN: 1501 IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1502 break; 1503 default: 1504 llvm_unreachable("Invalid intrinsic"); 1505 } 1506 SDLoc DL(Op); 1507 1508 switch (IntNo) { 1509 default: 1510 return SDValue(); // Don't custom lower most intrinsics. 1511 1512 case Intrinsic::wasm_lsda: { 1513 EVT VT = Op.getValueType(); 1514 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 1515 MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); 1516 auto &Context = MF.getMMI().getContext(); 1517 MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") + 1518 Twine(MF.getFunctionNumber())); 1519 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, 1520 DAG.getMCSymbol(S, PtrVT)); 1521 } 1522 1523 case Intrinsic::wasm_throw: { 1524 SDValue SymNode = getCppExceptionSymNode(Op, 2, DAG); 1525 return DAG.getNode(WebAssemblyISD::THROW, DL, 1526 MVT::Other, // outchain type 1527 { 1528 Op.getOperand(0), // inchain 1529 SymNode, // exception symbol 1530 Op.getOperand(3) // thrown value 1531 }); 1532 } 1533 1534 case Intrinsic::wasm_catch: { 1535 SDValue SymNode = getCppExceptionSymNode(Op, 2, DAG); 1536 return DAG.getNode(WebAssemblyISD::CATCH, DL, 1537 { 1538 MVT::i32, // outchain type 1539 MVT::Other // return value 1540 }, 1541 { 1542 Op.getOperand(0), // inchain 1543 SymNode // exception symbol 1544 }); 1545 } 1546 1547 case Intrinsic::wasm_shuffle: { 1548 // Drop in-chain and replace undefs, but otherwise pass through unchanged 1549 SDValue Ops[18]; 1550 size_t OpIdx = 0; 1551 Ops[OpIdx++] = Op.getOperand(1); 1552 Ops[OpIdx++] = Op.getOperand(2); 1553 while (OpIdx < 18) { 1554 const SDValue &MaskIdx = Op.getOperand(OpIdx + 1); 1555 if (MaskIdx.isUndef() || 1556 cast<ConstantSDNode>(MaskIdx.getNode())->getZExtValue() >= 32) { 1557 Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32); 1558 } else { 1559 Ops[OpIdx++] = MaskIdx; 1560 } 1561 } 1562 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops); 1563 } 1564 } 1565 } 1566 1567 SDValue 1568 WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, 1569 SelectionDAG &DAG) const { 1570 SDLoc DL(Op); 1571 // If sign extension operations are disabled, allow sext_inreg only if operand 1572 // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign 1573 // extension operations, but allowing sext_inreg in this context lets us have 1574 // simple patterns to select extract_lane_s instructions. Expanding sext_inreg 1575 // everywhere would be simpler in this file, but would necessitate large and 1576 // brittle patterns to undo the expansion and select extract_lane_s 1577 // instructions. 1578 assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128()); 1579 if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT) 1580 return SDValue(); 1581 1582 const SDValue &Extract = Op.getOperand(0); 1583 MVT VecT = Extract.getOperand(0).getSimpleValueType(); 1584 if (VecT.getVectorElementType().getSizeInBits() > 32) 1585 return SDValue(); 1586 MVT ExtractedLaneT = 1587 cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT(); 1588 MVT ExtractedVecT = 1589 MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits()); 1590 if (ExtractedVecT == VecT) 1591 return Op; 1592 1593 // Bitcast vector to appropriate type to ensure ISel pattern coverage 1594 const SDNode *Index = Extract.getOperand(1).getNode(); 1595 if (!isa<ConstantSDNode>(Index)) 1596 return SDValue(); 1597 unsigned IndexVal = cast<ConstantSDNode>(Index)->getZExtValue(); 1598 unsigned Scale = 1599 ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements(); 1600 assert(Scale > 1); 1601 SDValue NewIndex = 1602 DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0)); 1603 SDValue NewExtract = DAG.getNode( 1604 ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(), 1605 DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex); 1606 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract, 1607 Op.getOperand(1)); 1608 } 1609 1610 SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, 1611 SelectionDAG &DAG) const { 1612 SDLoc DL(Op); 1613 const EVT VecT = Op.getValueType(); 1614 const EVT LaneT = Op.getOperand(0).getValueType(); 1615 const size_t Lanes = Op.getNumOperands(); 1616 bool CanSwizzle = VecT == MVT::v16i8; 1617 1618 // BUILD_VECTORs are lowered to the instruction that initializes the highest 1619 // possible number of lanes at once followed by a sequence of replace_lane 1620 // instructions to individually initialize any remaining lanes. 1621 1622 // TODO: Tune this. For example, lanewise swizzling is very expensive, so 1623 // swizzled lanes should be given greater weight. 1624 1625 // TODO: Investigate looping rather than always extracting/replacing specific 1626 // lanes to fill gaps. 1627 1628 auto IsConstant = [](const SDValue &V) { 1629 return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP; 1630 }; 1631 1632 // Returns the source vector and index vector pair if they exist. Checks for: 1633 // (extract_vector_elt 1634 // $src, 1635 // (sign_extend_inreg (extract_vector_elt $indices, $i)) 1636 // ) 1637 auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) { 1638 auto Bail = std::make_pair(SDValue(), SDValue()); 1639 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 1640 return Bail; 1641 const SDValue &SwizzleSrc = Lane->getOperand(0); 1642 const SDValue &IndexExt = Lane->getOperand(1); 1643 if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG) 1644 return Bail; 1645 const SDValue &Index = IndexExt->getOperand(0); 1646 if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 1647 return Bail; 1648 const SDValue &SwizzleIndices = Index->getOperand(0); 1649 if (SwizzleSrc.getValueType() != MVT::v16i8 || 1650 SwizzleIndices.getValueType() != MVT::v16i8 || 1651 Index->getOperand(1)->getOpcode() != ISD::Constant || 1652 Index->getConstantOperandVal(1) != I) 1653 return Bail; 1654 return std::make_pair(SwizzleSrc, SwizzleIndices); 1655 }; 1656 1657 // If the lane is extracted from another vector at a constant index, return 1658 // that vector. The source vector must not have more lanes than the dest 1659 // because the shufflevector indices are in terms of the destination lanes and 1660 // would not be able to address the smaller individual source lanes. 1661 auto GetShuffleSrc = [&](const SDValue &Lane) { 1662 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 1663 return SDValue(); 1664 if (!isa<ConstantSDNode>(Lane->getOperand(1).getNode())) 1665 return SDValue(); 1666 if (Lane->getOperand(0).getValueType().getVectorNumElements() > 1667 VecT.getVectorNumElements()) 1668 return SDValue(); 1669 return Lane->getOperand(0); 1670 }; 1671 1672 using ValueEntry = std::pair<SDValue, size_t>; 1673 SmallVector<ValueEntry, 16> SplatValueCounts; 1674 1675 using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>; 1676 SmallVector<SwizzleEntry, 16> SwizzleCounts; 1677 1678 using ShuffleEntry = std::pair<SDValue, size_t>; 1679 SmallVector<ShuffleEntry, 16> ShuffleCounts; 1680 1681 auto AddCount = [](auto &Counts, const auto &Val) { 1682 auto CountIt = 1683 llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; }); 1684 if (CountIt == Counts.end()) { 1685 Counts.emplace_back(Val, 1); 1686 } else { 1687 CountIt->second++; 1688 } 1689 }; 1690 1691 auto GetMostCommon = [](auto &Counts) { 1692 auto CommonIt = 1693 std::max_element(Counts.begin(), Counts.end(), 1694 [](auto A, auto B) { return A.second < B.second; }); 1695 assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector"); 1696 return *CommonIt; 1697 }; 1698 1699 size_t NumConstantLanes = 0; 1700 1701 // Count eligible lanes for each type of vector creation op 1702 for (size_t I = 0; I < Lanes; ++I) { 1703 const SDValue &Lane = Op->getOperand(I); 1704 if (Lane.isUndef()) 1705 continue; 1706 1707 AddCount(SplatValueCounts, Lane); 1708 1709 if (IsConstant(Lane)) 1710 NumConstantLanes++; 1711 if (auto ShuffleSrc = GetShuffleSrc(Lane)) 1712 AddCount(ShuffleCounts, ShuffleSrc); 1713 if (CanSwizzle) { 1714 auto SwizzleSrcs = GetSwizzleSrcs(I, Lane); 1715 if (SwizzleSrcs.first) 1716 AddCount(SwizzleCounts, SwizzleSrcs); 1717 } 1718 } 1719 1720 SDValue SplatValue; 1721 size_t NumSplatLanes; 1722 std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts); 1723 1724 SDValue SwizzleSrc; 1725 SDValue SwizzleIndices; 1726 size_t NumSwizzleLanes = 0; 1727 if (SwizzleCounts.size()) 1728 std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices), 1729 NumSwizzleLanes) = GetMostCommon(SwizzleCounts); 1730 1731 // Shuffles can draw from up to two vectors, so find the two most common 1732 // sources. 1733 SDValue ShuffleSrc1, ShuffleSrc2; 1734 size_t NumShuffleLanes = 0; 1735 if (ShuffleCounts.size()) { 1736 std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts); 1737 ShuffleCounts.erase(std::remove_if(ShuffleCounts.begin(), 1738 ShuffleCounts.end(), 1739 [&](const auto &Pair) { 1740 return Pair.first == ShuffleSrc1; 1741 }), 1742 ShuffleCounts.end()); 1743 } 1744 if (ShuffleCounts.size()) { 1745 size_t AdditionalShuffleLanes; 1746 std::tie(ShuffleSrc2, AdditionalShuffleLanes) = 1747 GetMostCommon(ShuffleCounts); 1748 NumShuffleLanes += AdditionalShuffleLanes; 1749 } 1750 1751 // Predicate returning true if the lane is properly initialized by the 1752 // original instruction 1753 std::function<bool(size_t, const SDValue &)> IsLaneConstructed; 1754 SDValue Result; 1755 // Prefer swizzles over shuffles over vector consts over splats 1756 if (NumSwizzleLanes >= NumShuffleLanes && 1757 NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) { 1758 Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc, 1759 SwizzleIndices); 1760 auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices); 1761 IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) { 1762 return Swizzled == GetSwizzleSrcs(I, Lane); 1763 }; 1764 } else if (NumShuffleLanes >= NumConstantLanes && 1765 NumShuffleLanes >= NumSplatLanes) { 1766 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8; 1767 size_t DestLaneCount = VecT.getVectorNumElements(); 1768 size_t Scale1 = 1; 1769 size_t Scale2 = 1; 1770 SDValue Src1 = ShuffleSrc1; 1771 SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VecT); 1772 if (Src1.getValueType() != VecT) { 1773 size_t LaneSize = 1774 Src1.getValueType().getVectorElementType().getFixedSizeInBits() / 8; 1775 assert(LaneSize > DestLaneSize); 1776 Scale1 = LaneSize / DestLaneSize; 1777 Src1 = DAG.getBitcast(VecT, Src1); 1778 } 1779 if (Src2.getValueType() != VecT) { 1780 size_t LaneSize = 1781 Src2.getValueType().getVectorElementType().getFixedSizeInBits() / 8; 1782 assert(LaneSize > DestLaneSize); 1783 Scale2 = LaneSize / DestLaneSize; 1784 Src2 = DAG.getBitcast(VecT, Src2); 1785 } 1786 1787 int Mask[16]; 1788 assert(DestLaneCount <= 16); 1789 for (size_t I = 0; I < DestLaneCount; ++I) { 1790 const SDValue &Lane = Op->getOperand(I); 1791 SDValue Src = GetShuffleSrc(Lane); 1792 if (Src == ShuffleSrc1) { 1793 Mask[I] = Lane->getConstantOperandVal(1) * Scale1; 1794 } else if (Src && Src == ShuffleSrc2) { 1795 Mask[I] = DestLaneCount + Lane->getConstantOperandVal(1) * Scale2; 1796 } else { 1797 Mask[I] = -1; 1798 } 1799 } 1800 ArrayRef<int> MaskRef(Mask, DestLaneCount); 1801 Result = DAG.getVectorShuffle(VecT, DL, Src1, Src2, MaskRef); 1802 IsLaneConstructed = [&](size_t, const SDValue &Lane) { 1803 auto Src = GetShuffleSrc(Lane); 1804 return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2); 1805 }; 1806 } else if (NumConstantLanes >= NumSplatLanes) { 1807 SmallVector<SDValue, 16> ConstLanes; 1808 for (const SDValue &Lane : Op->op_values()) { 1809 if (IsConstant(Lane)) { 1810 ConstLanes.push_back(Lane); 1811 } else if (LaneT.isFloatingPoint()) { 1812 ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT)); 1813 } else { 1814 ConstLanes.push_back(DAG.getConstant(0, DL, LaneT)); 1815 } 1816 } 1817 Result = DAG.getBuildVector(VecT, DL, ConstLanes); 1818 IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) { 1819 return IsConstant(Lane); 1820 }; 1821 } else { 1822 // Use a splat, but possibly a load_splat 1823 LoadSDNode *SplattedLoad; 1824 if ((SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) && 1825 SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) { 1826 Result = DAG.getMemIntrinsicNode( 1827 WebAssemblyISD::LOAD_SPLAT, DL, DAG.getVTList(VecT), 1828 {SplattedLoad->getChain(), SplattedLoad->getBasePtr(), 1829 SplattedLoad->getOffset()}, 1830 SplattedLoad->getMemoryVT(), SplattedLoad->getMemOperand()); 1831 } else { 1832 Result = DAG.getSplatBuildVector(VecT, DL, SplatValue); 1833 } 1834 IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) { 1835 return Lane == SplatValue; 1836 }; 1837 } 1838 1839 assert(Result); 1840 assert(IsLaneConstructed); 1841 1842 // Add replace_lane instructions for any unhandled values 1843 for (size_t I = 0; I < Lanes; ++I) { 1844 const SDValue &Lane = Op->getOperand(I); 1845 if (!Lane.isUndef() && !IsLaneConstructed(I, Lane)) 1846 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane, 1847 DAG.getConstant(I, DL, MVT::i32)); 1848 } 1849 1850 return Result; 1851 } 1852 1853 SDValue 1854 WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, 1855 SelectionDAG &DAG) const { 1856 SDLoc DL(Op); 1857 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask(); 1858 MVT VecType = Op.getOperand(0).getSimpleValueType(); 1859 assert(VecType.is128BitVector() && "Unexpected shuffle vector type"); 1860 size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8; 1861 1862 // Space for two vector args and sixteen mask indices 1863 SDValue Ops[18]; 1864 size_t OpIdx = 0; 1865 Ops[OpIdx++] = Op.getOperand(0); 1866 Ops[OpIdx++] = Op.getOperand(1); 1867 1868 // Expand mask indices to byte indices and materialize them as operands 1869 for (int M : Mask) { 1870 for (size_t J = 0; J < LaneBytes; ++J) { 1871 // Lower undefs (represented by -1 in mask) to zero 1872 uint64_t ByteIndex = M == -1 ? 0 : (uint64_t)M * LaneBytes + J; 1873 Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32); 1874 } 1875 } 1876 1877 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops); 1878 } 1879 1880 SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op, 1881 SelectionDAG &DAG) const { 1882 SDLoc DL(Op); 1883 // The legalizer does not know how to expand the unsupported comparison modes 1884 // of i64x2 vectors, so we manually unroll them here. 1885 assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64); 1886 SmallVector<SDValue, 2> LHS, RHS; 1887 DAG.ExtractVectorElements(Op->getOperand(0), LHS); 1888 DAG.ExtractVectorElements(Op->getOperand(1), RHS); 1889 const SDValue &CC = Op->getOperand(2); 1890 auto MakeLane = [&](unsigned I) { 1891 return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I], 1892 DAG.getConstant(uint64_t(-1), DL, MVT::i64), 1893 DAG.getConstant(uint64_t(0), DL, MVT::i64), CC); 1894 }; 1895 return DAG.getBuildVector(Op->getValueType(0), DL, 1896 {MakeLane(0), MakeLane(1)}); 1897 } 1898 1899 SDValue 1900 WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op, 1901 SelectionDAG &DAG) const { 1902 // Allow constant lane indices, expand variable lane indices 1903 SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode(); 1904 if (isa<ConstantSDNode>(IdxNode) || IdxNode->isUndef()) 1905 return Op; 1906 else 1907 // Perform default expansion 1908 return SDValue(); 1909 } 1910 1911 static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) { 1912 EVT LaneT = Op.getSimpleValueType().getVectorElementType(); 1913 // 32-bit and 64-bit unrolled shifts will have proper semantics 1914 if (LaneT.bitsGE(MVT::i32)) 1915 return DAG.UnrollVectorOp(Op.getNode()); 1916 // Otherwise mask the shift value to get proper semantics from 32-bit shift 1917 SDLoc DL(Op); 1918 size_t NumLanes = Op.getSimpleValueType().getVectorNumElements(); 1919 SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32); 1920 unsigned ShiftOpcode = Op.getOpcode(); 1921 SmallVector<SDValue, 16> ShiftedElements; 1922 DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32); 1923 SmallVector<SDValue, 16> ShiftElements; 1924 DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32); 1925 SmallVector<SDValue, 16> UnrolledOps; 1926 for (size_t i = 0; i < NumLanes; ++i) { 1927 SDValue MaskedShiftValue = 1928 DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask); 1929 SDValue ShiftedValue = ShiftedElements[i]; 1930 if (ShiftOpcode == ISD::SRA) 1931 ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, 1932 ShiftedValue, DAG.getValueType(LaneT)); 1933 UnrolledOps.push_back( 1934 DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue)); 1935 } 1936 return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps); 1937 } 1938 1939 SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op, 1940 SelectionDAG &DAG) const { 1941 SDLoc DL(Op); 1942 1943 // Only manually lower vector shifts 1944 assert(Op.getSimpleValueType().isVector()); 1945 1946 auto ShiftVal = DAG.getSplatValue(Op.getOperand(1)); 1947 if (!ShiftVal) 1948 return unrollVectorShift(Op, DAG); 1949 1950 // Use anyext because none of the high bits can affect the shift 1951 ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32); 1952 1953 unsigned Opcode; 1954 switch (Op.getOpcode()) { 1955 case ISD::SHL: 1956 Opcode = WebAssemblyISD::VEC_SHL; 1957 break; 1958 case ISD::SRA: 1959 Opcode = WebAssemblyISD::VEC_SHR_S; 1960 break; 1961 case ISD::SRL: 1962 Opcode = WebAssemblyISD::VEC_SHR_U; 1963 break; 1964 default: 1965 llvm_unreachable("unexpected opcode"); 1966 } 1967 1968 return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal); 1969 } 1970 1971 SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op, 1972 SelectionDAG &DAG) const { 1973 SDLoc DL(Op); 1974 EVT ResT = Op.getValueType(); 1975 uint64_t Width = Op.getConstantOperandVal(1); 1976 1977 if ((ResT == MVT::i32 || ResT == MVT::i64) && (Width == 32 || Width == 64)) 1978 return Op; 1979 1980 if (ResT == MVT::v4i32 && Width == 32) 1981 return Op; 1982 1983 return SDValue(); 1984 } 1985 1986 //===----------------------------------------------------------------------===// 1987 // Custom DAG combine hooks 1988 //===----------------------------------------------------------------------===// 1989 static SDValue 1990 performVECTOR_SHUFFLECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 1991 auto &DAG = DCI.DAG; 1992 auto Shuffle = cast<ShuffleVectorSDNode>(N); 1993 1994 // Hoist vector bitcasts that don't change the number of lanes out of unary 1995 // shuffles, where they are less likely to get in the way of other combines. 1996 // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) -> 1997 // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask)))) 1998 SDValue Bitcast = N->getOperand(0); 1999 if (Bitcast.getOpcode() != ISD::BITCAST) 2000 return SDValue(); 2001 if (!N->getOperand(1).isUndef()) 2002 return SDValue(); 2003 SDValue CastOp = Bitcast.getOperand(0); 2004 MVT SrcType = CastOp.getSimpleValueType(); 2005 MVT DstType = Bitcast.getSimpleValueType(); 2006 if (!SrcType.is128BitVector() || 2007 SrcType.getVectorNumElements() != DstType.getVectorNumElements()) 2008 return SDValue(); 2009 SDValue NewShuffle = DAG.getVectorShuffle( 2010 SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask()); 2011 return DAG.getBitcast(DstType, NewShuffle); 2012 } 2013 2014 static SDValue 2015 performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 2016 auto &DAG = DCI.DAG; 2017 assert(N->getOpcode() == ISD::SIGN_EXTEND || 2018 N->getOpcode() == ISD::ZERO_EXTEND); 2019 2020 // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if 2021 // possible before the extract_subvector can be expanded. 2022 auto Extract = N->getOperand(0); 2023 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR) 2024 return SDValue(); 2025 auto Source = Extract.getOperand(0); 2026 auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1)); 2027 if (IndexNode == nullptr) 2028 return SDValue(); 2029 auto Index = IndexNode->getZExtValue(); 2030 2031 // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the 2032 // extracted subvector is the low or high half of its source. 2033 EVT ResVT = N->getValueType(0); 2034 if (ResVT == MVT::v8i16) { 2035 if (Extract.getValueType() != MVT::v8i8 || 2036 Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8)) 2037 return SDValue(); 2038 } else if (ResVT == MVT::v4i32) { 2039 if (Extract.getValueType() != MVT::v4i16 || 2040 Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4)) 2041 return SDValue(); 2042 } else if (ResVT == MVT::v2i64) { 2043 if (Extract.getValueType() != MVT::v2i32 || 2044 Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2)) 2045 return SDValue(); 2046 } else { 2047 return SDValue(); 2048 } 2049 2050 bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND; 2051 bool IsLow = Index == 0; 2052 2053 unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S 2054 : WebAssemblyISD::EXTEND_HIGH_S) 2055 : (IsLow ? WebAssemblyISD::EXTEND_LOW_U 2056 : WebAssemblyISD::EXTEND_HIGH_U); 2057 2058 return DAG.getNode(Op, SDLoc(N), ResVT, Source); 2059 } 2060 2061 static SDValue 2062 performVectorConvertLowCombine(SDNode *N, 2063 TargetLowering::DAGCombinerInfo &DCI) { 2064 auto &DAG = DCI.DAG; 2065 assert(N->getOpcode() == ISD::SINT_TO_FP || 2066 N->getOpcode() == ISD::UINT_TO_FP); 2067 2068 // Combine ({s,u}int_to_fp (extract_subvector ... 0)) to an 2069 // f64x2.convert_low_i32x4_{s,u} SDNode. 2070 auto Extract = N->getOperand(0); 2071 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR) 2072 return SDValue(); 2073 auto Source = Extract.getOperand(0); 2074 if (Source.getValueType() != MVT::v4i32) 2075 return SDValue(); 2076 auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1)); 2077 if (IndexNode == nullptr) 2078 return SDValue(); 2079 auto Index = IndexNode->getZExtValue(); 2080 2081 // The types must be correct. 2082 EVT ResVT = N->getValueType(0); 2083 if (ResVT != MVT::v2f64 || Extract.getValueType() != MVT::v2i32) 2084 return SDValue(); 2085 2086 // The extracted vector must be the low half. 2087 if (Index != 0) 2088 return SDValue(); 2089 2090 unsigned Op = N->getOpcode() == ISD::SINT_TO_FP 2091 ? WebAssemblyISD::CONVERT_LOW_S 2092 : WebAssemblyISD::CONVERT_LOW_U; 2093 2094 return DAG.getNode(Op, SDLoc(N), ResVT, Source); 2095 } 2096 2097 static SDValue 2098 performVectorTruncSatLowCombine(SDNode *N, 2099 TargetLowering::DAGCombinerInfo &DCI) { 2100 auto &DAG = DCI.DAG; 2101 assert(N->getOpcode() == ISD::CONCAT_VECTORS); 2102 2103 // Combine this: 2104 // 2105 // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0))) 2106 // 2107 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x). 2108 EVT ResVT = N->getValueType(0); 2109 if (ResVT != MVT::v4i32) 2110 return SDValue(); 2111 2112 auto FPToInt = N->getOperand(0); 2113 auto FPToIntOp = FPToInt.getOpcode(); 2114 if (FPToIntOp != ISD::FP_TO_SINT_SAT && FPToIntOp != ISD::FP_TO_UINT_SAT) 2115 return SDValue(); 2116 if (FPToInt.getConstantOperandVal(1) != 32) 2117 return SDValue(); 2118 2119 auto Source = FPToInt.getOperand(0); 2120 if (Source.getValueType() != MVT::v2f64) 2121 return SDValue(); 2122 2123 auto *Splat = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 2124 APInt SplatValue, SplatUndef; 2125 unsigned SplatBitSize; 2126 bool HasAnyUndefs; 2127 if (!Splat || !Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, 2128 HasAnyUndefs)) 2129 return SDValue(); 2130 if (SplatValue != 0) 2131 return SDValue(); 2132 2133 unsigned Op = FPToIntOp == ISD::FP_TO_SINT_SAT 2134 ? WebAssemblyISD::TRUNC_SAT_ZERO_S 2135 : WebAssemblyISD::TRUNC_SAT_ZERO_U; 2136 2137 return DAG.getNode(Op, SDLoc(N), ResVT, Source); 2138 } 2139 2140 SDValue 2141 WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N, 2142 DAGCombinerInfo &DCI) const { 2143 switch (N->getOpcode()) { 2144 default: 2145 return SDValue(); 2146 case ISD::VECTOR_SHUFFLE: 2147 return performVECTOR_SHUFFLECombine(N, DCI); 2148 case ISD::SIGN_EXTEND: 2149 case ISD::ZERO_EXTEND: 2150 return performVectorExtendCombine(N, DCI); 2151 case ISD::SINT_TO_FP: 2152 case ISD::UINT_TO_FP: 2153 return performVectorConvertLowCombine(N, DCI); 2154 case ISD::CONCAT_VECTORS: 2155 return performVectorTruncSatLowCombine(N, DCI); 2156 } 2157 } 2158