1 //=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements the WebAssemblyTargetLowering class. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "WebAssemblyISelLowering.h" 15 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 16 #include "WebAssemblyMachineFunctionInfo.h" 17 #include "WebAssemblySubtarget.h" 18 #include "WebAssemblyTargetMachine.h" 19 #include "llvm/CodeGen/Analysis.h" 20 #include "llvm/CodeGen/CallingConvLower.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineJumpTableInfo.h" 23 #include "llvm/CodeGen/MachineModuleInfo.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 #include "llvm/CodeGen/SelectionDAG.h" 26 #include "llvm/CodeGen/WasmEHFuncInfo.h" 27 #include "llvm/IR/DiagnosticInfo.h" 28 #include "llvm/IR/DiagnosticPrinter.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/IR/Intrinsics.h" 31 #include "llvm/IR/IntrinsicsWebAssembly.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/Support/MathExtras.h" 35 #include "llvm/Support/raw_ostream.h" 36 #include "llvm/Target/TargetOptions.h" 37 using namespace llvm; 38 39 #define DEBUG_TYPE "wasm-lower" 40 41 WebAssemblyTargetLowering::WebAssemblyTargetLowering( 42 const TargetMachine &TM, const WebAssemblySubtarget &STI) 43 : TargetLowering(TM), Subtarget(&STI) { 44 auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32; 45 46 // Booleans always contain 0 or 1. 47 setBooleanContents(ZeroOrOneBooleanContent); 48 // Except in SIMD vectors 49 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 50 // We don't know the microarchitecture here, so just reduce register pressure. 51 setSchedulingPreference(Sched::RegPressure); 52 // Tell ISel that we have a stack pointer. 53 setStackPointerRegisterToSaveRestore( 54 Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32); 55 // Set up the register classes. 56 addRegisterClass(MVT::i32, &WebAssembly::I32RegClass); 57 addRegisterClass(MVT::i64, &WebAssembly::I64RegClass); 58 addRegisterClass(MVT::f32, &WebAssembly::F32RegClass); 59 addRegisterClass(MVT::f64, &WebAssembly::F64RegClass); 60 if (Subtarget->hasSIMD128()) { 61 addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass); 62 addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass); 63 addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass); 64 addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass); 65 addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass); 66 addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass); 67 } 68 // Compute derived properties from the register classes. 69 computeRegisterProperties(Subtarget->getRegisterInfo()); 70 71 setOperationAction(ISD::GlobalAddress, MVTPtr, Custom); 72 setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom); 73 setOperationAction(ISD::JumpTable, MVTPtr, Custom); 74 setOperationAction(ISD::BlockAddress, MVTPtr, Custom); 75 setOperationAction(ISD::BRIND, MVT::Other, Custom); 76 77 // Take the default expansion for va_arg, va_copy, and va_end. There is no 78 // default action for va_start, so we do that custom. 79 setOperationAction(ISD::VASTART, MVT::Other, Custom); 80 setOperationAction(ISD::VAARG, MVT::Other, Expand); 81 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 82 setOperationAction(ISD::VAEND, MVT::Other, Expand); 83 84 for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) { 85 // Don't expand the floating-point types to constant pools. 86 setOperationAction(ISD::ConstantFP, T, Legal); 87 // Expand floating-point comparisons. 88 for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE, 89 ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE}) 90 setCondCodeAction(CC, T, Expand); 91 // Expand floating-point library function operators. 92 for (auto Op : 93 {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA}) 94 setOperationAction(Op, T, Expand); 95 // Note supported floating-point library function operators that otherwise 96 // default to expand. 97 for (auto Op : 98 {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT}) 99 setOperationAction(Op, T, Legal); 100 // Support minimum and maximum, which otherwise default to expand. 101 setOperationAction(ISD::FMINIMUM, T, Legal); 102 setOperationAction(ISD::FMAXIMUM, T, Legal); 103 // WebAssembly currently has no builtin f16 support. 104 setOperationAction(ISD::FP16_TO_FP, T, Expand); 105 setOperationAction(ISD::FP_TO_FP16, T, Expand); 106 setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand); 107 setTruncStoreAction(T, MVT::f16, Expand); 108 } 109 110 // Expand unavailable integer operations. 111 for (auto Op : 112 {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU, 113 ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS, 114 ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) { 115 for (auto T : {MVT::i32, MVT::i64}) 116 setOperationAction(Op, T, Expand); 117 if (Subtarget->hasSIMD128()) 118 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) 119 setOperationAction(Op, T, Expand); 120 } 121 122 // SIMD-specific configuration 123 if (Subtarget->hasSIMD128()) { 124 // Hoist bitcasts out of shuffles 125 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 126 127 // Combine extends of extract_subvectors into widening ops 128 setTargetDAGCombine(ISD::SIGN_EXTEND); 129 setTargetDAGCombine(ISD::ZERO_EXTEND); 130 131 // Support saturating add for i8x16 and i16x8 132 for (auto Op : {ISD::SADDSAT, ISD::UADDSAT}) 133 for (auto T : {MVT::v16i8, MVT::v8i16}) 134 setOperationAction(Op, T, Legal); 135 136 // Support integer abs 137 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) 138 setOperationAction(ISD::ABS, T, Legal); 139 140 // Custom lower BUILD_VECTORs to minimize number of replace_lanes 141 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, 142 MVT::v2f64}) 143 setOperationAction(ISD::BUILD_VECTOR, T, Custom); 144 145 // We have custom shuffle lowering to expose the shuffle mask 146 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, 147 MVT::v2f64}) 148 setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom); 149 150 // Custom lowering since wasm shifts must have a scalar shift amount 151 for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL}) 152 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) 153 setOperationAction(Op, T, Custom); 154 155 // Custom lower lane accesses to expand out variable indices 156 for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT}) 157 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, 158 MVT::v2f64}) 159 setOperationAction(Op, T, Custom); 160 161 // There is no i8x16.mul instruction 162 setOperationAction(ISD::MUL, MVT::v16i8, Expand); 163 164 // There is no vector conditional select instruction 165 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, 166 MVT::v2f64}) 167 setOperationAction(ISD::SELECT_CC, T, Expand); 168 169 // Expand integer operations supported for scalars but not SIMD 170 for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV, 171 ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR}) 172 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) 173 setOperationAction(Op, T, Expand); 174 175 // But we do have integer min and max operations 176 for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) 177 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) 178 setOperationAction(Op, T, Legal); 179 180 // Expand float operations supported for scalars but not SIMD 181 for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, 182 ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, 183 ISD::FEXP, ISD::FEXP2, ISD::FRINT}) 184 for (auto T : {MVT::v4f32, MVT::v2f64}) 185 setOperationAction(Op, T, Expand); 186 187 // Expand operations not supported for i64x2 vectors 188 for (unsigned CC = 0; CC < ISD::SETCC_INVALID; ++CC) 189 setCondCodeAction(static_cast<ISD::CondCode>(CC), MVT::v2i64, Custom); 190 191 // 64x2 conversions are not in the spec 192 for (auto Op : 193 {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT}) 194 for (auto T : {MVT::v2i64, MVT::v2f64}) 195 setOperationAction(Op, T, Expand); 196 } 197 198 // As a special case, these operators use the type to mean the type to 199 // sign-extend from. 200 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 201 if (!Subtarget->hasSignExt()) { 202 // Sign extends are legal only when extending a vector extract 203 auto Action = Subtarget->hasSIMD128() ? Custom : Expand; 204 for (auto T : {MVT::i8, MVT::i16, MVT::i32}) 205 setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action); 206 } 207 for (auto T : MVT::integer_fixedlen_vector_valuetypes()) 208 setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand); 209 210 // Dynamic stack allocation: use the default expansion. 211 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 212 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 213 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand); 214 215 setOperationAction(ISD::FrameIndex, MVT::i32, Custom); 216 setOperationAction(ISD::FrameIndex, MVT::i64, Custom); 217 setOperationAction(ISD::CopyToReg, MVT::Other, Custom); 218 219 // Expand these forms; we pattern-match the forms that we can handle in isel. 220 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) 221 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC}) 222 setOperationAction(Op, T, Expand); 223 224 // We have custom switch handling. 225 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 226 227 // WebAssembly doesn't have: 228 // - Floating-point extending loads. 229 // - Floating-point truncating stores. 230 // - i1 extending loads. 231 // - truncating SIMD stores and most extending loads 232 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 233 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 234 for (auto T : MVT::integer_valuetypes()) 235 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}) 236 setLoadExtAction(Ext, T, MVT::i1, Promote); 237 if (Subtarget->hasSIMD128()) { 238 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, 239 MVT::v2f64}) { 240 for (auto MemT : MVT::fixedlen_vector_valuetypes()) { 241 if (MVT(T) != MemT) { 242 setTruncStoreAction(T, MemT, Expand); 243 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}) 244 setLoadExtAction(Ext, T, MemT, Expand); 245 } 246 } 247 } 248 // But some vector extending loads are legal 249 for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) { 250 setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal); 251 setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal); 252 setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal); 253 } 254 // And some truncating stores are legal as well 255 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); 256 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); 257 } 258 259 // Don't do anything clever with build_pairs 260 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 261 262 // Trap lowers to wasm unreachable 263 setOperationAction(ISD::TRAP, MVT::Other, Legal); 264 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 265 266 // Exception handling intrinsics 267 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 268 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 269 270 setMaxAtomicSizeInBitsSupported(64); 271 272 // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is 273 // consistent with the f64 and f128 names. 274 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); 275 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); 276 277 // Define the emscripten name for return address helper. 278 // TODO: when implementing other Wasm backends, make this generic or only do 279 // this on emscripten depending on what they end up doing. 280 setLibcallName(RTLIB::RETURN_ADDRESS, "emscripten_return_address"); 281 282 // Always convert switches to br_tables unless there is only one case, which 283 // is equivalent to a simple branch. This reduces code size for wasm, and we 284 // defer possible jump table optimizations to the VM. 285 setMinimumJumpTableEntries(2); 286 } 287 288 TargetLowering::AtomicExpansionKind 289 WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 290 // We have wasm instructions for these 291 switch (AI->getOperation()) { 292 case AtomicRMWInst::Add: 293 case AtomicRMWInst::Sub: 294 case AtomicRMWInst::And: 295 case AtomicRMWInst::Or: 296 case AtomicRMWInst::Xor: 297 case AtomicRMWInst::Xchg: 298 return AtomicExpansionKind::None; 299 default: 300 break; 301 } 302 return AtomicExpansionKind::CmpXChg; 303 } 304 305 FastISel *WebAssemblyTargetLowering::createFastISel( 306 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const { 307 return WebAssembly::createFastISel(FuncInfo, LibInfo); 308 } 309 310 MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/, 311 EVT VT) const { 312 unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1); 313 if (BitWidth > 1 && BitWidth < 8) 314 BitWidth = 8; 315 316 if (BitWidth > 64) { 317 // The shift will be lowered to a libcall, and compiler-rt libcalls expect 318 // the count to be an i32. 319 BitWidth = 32; 320 assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) && 321 "32-bit shift counts ought to be enough for anyone"); 322 } 323 324 MVT Result = MVT::getIntegerVT(BitWidth); 325 assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE && 326 "Unable to represent scalar shift amount type"); 327 return Result; 328 } 329 330 // Lower an fp-to-int conversion operator from the LLVM opcode, which has an 331 // undefined result on invalid/overflow, to the WebAssembly opcode, which 332 // traps on invalid/overflow. 333 static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL, 334 MachineBasicBlock *BB, 335 const TargetInstrInfo &TII, 336 bool IsUnsigned, bool Int64, 337 bool Float64, unsigned LoweredOpcode) { 338 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 339 340 Register OutReg = MI.getOperand(0).getReg(); 341 Register InReg = MI.getOperand(1).getReg(); 342 343 unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32; 344 unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32; 345 unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32; 346 unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32; 347 unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32; 348 unsigned Eqz = WebAssembly::EQZ_I32; 349 unsigned And = WebAssembly::AND_I32; 350 int64_t Limit = Int64 ? INT64_MIN : INT32_MIN; 351 int64_t Substitute = IsUnsigned ? 0 : Limit; 352 double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit; 353 auto &Context = BB->getParent()->getFunction().getContext(); 354 Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context); 355 356 const BasicBlock *LLVMBB = BB->getBasicBlock(); 357 MachineFunction *F = BB->getParent(); 358 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB); 359 MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB); 360 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB); 361 362 MachineFunction::iterator It = ++BB->getIterator(); 363 F->insert(It, FalseMBB); 364 F->insert(It, TrueMBB); 365 F->insert(It, DoneMBB); 366 367 // Transfer the remainder of BB and its successor edges to DoneMBB. 368 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end()); 369 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 370 371 BB->addSuccessor(TrueMBB); 372 BB->addSuccessor(FalseMBB); 373 TrueMBB->addSuccessor(DoneMBB); 374 FalseMBB->addSuccessor(DoneMBB); 375 376 unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg; 377 Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); 378 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); 379 CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 380 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 381 FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg)); 382 TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg)); 383 384 MI.eraseFromParent(); 385 // For signed numbers, we can do a single comparison to determine whether 386 // fabs(x) is within range. 387 if (IsUnsigned) { 388 Tmp0 = InReg; 389 } else { 390 BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg); 391 } 392 BuildMI(BB, DL, TII.get(FConst), Tmp1) 393 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal))); 394 BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1); 395 396 // For unsigned numbers, we have to do a separate comparison with zero. 397 if (IsUnsigned) { 398 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); 399 Register SecondCmpReg = 400 MRI.createVirtualRegister(&WebAssembly::I32RegClass); 401 Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 402 BuildMI(BB, DL, TII.get(FConst), Tmp1) 403 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0))); 404 BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1); 405 BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg); 406 CmpReg = AndReg; 407 } 408 409 BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg); 410 411 // Create the CFG diamond to select between doing the conversion or using 412 // the substitute value. 413 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg); 414 BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg); 415 BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB); 416 BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute); 417 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg) 418 .addReg(FalseReg) 419 .addMBB(FalseMBB) 420 .addReg(TrueReg) 421 .addMBB(TrueMBB); 422 423 return DoneMBB; 424 } 425 426 static MachineBasicBlock *LowerCallResults(MachineInstr &CallResults, 427 DebugLoc DL, MachineBasicBlock *BB, 428 const TargetInstrInfo &TII) { 429 MachineInstr &CallParams = *CallResults.getPrevNode(); 430 assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS); 431 assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS || 432 CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS); 433 434 bool IsIndirect = CallParams.getOperand(0).isReg(); 435 bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS; 436 437 unsigned CallOp; 438 if (IsIndirect && IsRetCall) { 439 CallOp = WebAssembly::RET_CALL_INDIRECT; 440 } else if (IsIndirect) { 441 CallOp = WebAssembly::CALL_INDIRECT; 442 } else if (IsRetCall) { 443 CallOp = WebAssembly::RET_CALL; 444 } else { 445 CallOp = WebAssembly::CALL; 446 } 447 448 MachineFunction &MF = *BB->getParent(); 449 const MCInstrDesc &MCID = TII.get(CallOp); 450 MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL)); 451 452 // See if we must truncate the function pointer. 453 // CALL_INDIRECT takes an i32, but in wasm64 we represent function pointers 454 // as 64-bit for uniformity with other pointer types. 455 if (IsIndirect && MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()) { 456 Register Reg32 = 457 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass); 458 auto &FnPtr = CallParams.getOperand(0); 459 BuildMI(*BB, CallResults.getIterator(), DL, 460 TII.get(WebAssembly::I32_WRAP_I64), Reg32) 461 .addReg(FnPtr.getReg()); 462 FnPtr.setReg(Reg32); 463 } 464 465 // Move the function pointer to the end of the arguments for indirect calls 466 if (IsIndirect) { 467 auto FnPtr = CallParams.getOperand(0); 468 CallParams.RemoveOperand(0); 469 CallParams.addOperand(FnPtr); 470 } 471 472 for (auto Def : CallResults.defs()) 473 MIB.add(Def); 474 475 // Add placeholders for the type index and immediate flags 476 if (IsIndirect) { 477 MIB.addImm(0); 478 MIB.addImm(0); 479 } 480 481 for (auto Use : CallParams.uses()) 482 MIB.add(Use); 483 484 BB->insert(CallResults.getIterator(), MIB); 485 CallParams.eraseFromParent(); 486 CallResults.eraseFromParent(); 487 488 return BB; 489 } 490 491 MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter( 492 MachineInstr &MI, MachineBasicBlock *BB) const { 493 const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); 494 DebugLoc DL = MI.getDebugLoc(); 495 496 switch (MI.getOpcode()) { 497 default: 498 llvm_unreachable("Unexpected instr type to insert"); 499 case WebAssembly::FP_TO_SINT_I32_F32: 500 return LowerFPToInt(MI, DL, BB, TII, false, false, false, 501 WebAssembly::I32_TRUNC_S_F32); 502 case WebAssembly::FP_TO_UINT_I32_F32: 503 return LowerFPToInt(MI, DL, BB, TII, true, false, false, 504 WebAssembly::I32_TRUNC_U_F32); 505 case WebAssembly::FP_TO_SINT_I64_F32: 506 return LowerFPToInt(MI, DL, BB, TII, false, true, false, 507 WebAssembly::I64_TRUNC_S_F32); 508 case WebAssembly::FP_TO_UINT_I64_F32: 509 return LowerFPToInt(MI, DL, BB, TII, true, true, false, 510 WebAssembly::I64_TRUNC_U_F32); 511 case WebAssembly::FP_TO_SINT_I32_F64: 512 return LowerFPToInt(MI, DL, BB, TII, false, false, true, 513 WebAssembly::I32_TRUNC_S_F64); 514 case WebAssembly::FP_TO_UINT_I32_F64: 515 return LowerFPToInt(MI, DL, BB, TII, true, false, true, 516 WebAssembly::I32_TRUNC_U_F64); 517 case WebAssembly::FP_TO_SINT_I64_F64: 518 return LowerFPToInt(MI, DL, BB, TII, false, true, true, 519 WebAssembly::I64_TRUNC_S_F64); 520 case WebAssembly::FP_TO_UINT_I64_F64: 521 return LowerFPToInt(MI, DL, BB, TII, true, true, true, 522 WebAssembly::I64_TRUNC_U_F64); 523 case WebAssembly::CALL_RESULTS: 524 case WebAssembly::RET_CALL_RESULTS: 525 return LowerCallResults(MI, DL, BB, TII); 526 } 527 } 528 529 const char * 530 WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const { 531 switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) { 532 case WebAssemblyISD::FIRST_NUMBER: 533 case WebAssemblyISD::FIRST_MEM_OPCODE: 534 break; 535 #define HANDLE_NODETYPE(NODE) \ 536 case WebAssemblyISD::NODE: \ 537 return "WebAssemblyISD::" #NODE; 538 #define HANDLE_MEM_NODETYPE(NODE) HANDLE_NODETYPE(NODE) 539 #include "WebAssemblyISD.def" 540 #undef HANDLE_MEM_NODETYPE 541 #undef HANDLE_NODETYPE 542 } 543 return nullptr; 544 } 545 546 std::pair<unsigned, const TargetRegisterClass *> 547 WebAssemblyTargetLowering::getRegForInlineAsmConstraint( 548 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { 549 // First, see if this is a constraint that directly corresponds to a 550 // WebAssembly register class. 551 if (Constraint.size() == 1) { 552 switch (Constraint[0]) { 553 case 'r': 554 assert(VT != MVT::iPTR && "Pointer MVT not expected here"); 555 if (Subtarget->hasSIMD128() && VT.isVector()) { 556 if (VT.getSizeInBits() == 128) 557 return std::make_pair(0U, &WebAssembly::V128RegClass); 558 } 559 if (VT.isInteger() && !VT.isVector()) { 560 if (VT.getSizeInBits() <= 32) 561 return std::make_pair(0U, &WebAssembly::I32RegClass); 562 if (VT.getSizeInBits() <= 64) 563 return std::make_pair(0U, &WebAssembly::I64RegClass); 564 } 565 break; 566 default: 567 break; 568 } 569 } 570 571 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 572 } 573 574 bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const { 575 // Assume ctz is a relatively cheap operation. 576 return true; 577 } 578 579 bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const { 580 // Assume clz is a relatively cheap operation. 581 return true; 582 } 583 584 bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL, 585 const AddrMode &AM, 586 Type *Ty, unsigned AS, 587 Instruction *I) const { 588 // WebAssembly offsets are added as unsigned without wrapping. The 589 // isLegalAddressingMode gives us no way to determine if wrapping could be 590 // happening, so we approximate this by accepting only non-negative offsets. 591 if (AM.BaseOffs < 0) 592 return false; 593 594 // WebAssembly has no scale register operands. 595 if (AM.Scale != 0) 596 return false; 597 598 // Everything else is legal. 599 return true; 600 } 601 602 bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses( 603 EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/, 604 MachineMemOperand::Flags /*Flags*/, bool *Fast) const { 605 // WebAssembly supports unaligned accesses, though it should be declared 606 // with the p2align attribute on loads and stores which do so, and there 607 // may be a performance impact. We tell LLVM they're "fast" because 608 // for the kinds of things that LLVM uses this for (merging adjacent stores 609 // of constants, etc.), WebAssembly implementations will either want the 610 // unaligned access or they'll split anyway. 611 if (Fast) 612 *Fast = true; 613 return true; 614 } 615 616 bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT, 617 AttributeList Attr) const { 618 // The current thinking is that wasm engines will perform this optimization, 619 // so we can save on code size. 620 return true; 621 } 622 623 bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { 624 EVT ExtT = ExtVal.getValueType(); 625 EVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getValueType(0); 626 return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) || 627 (ExtT == MVT::v4i32 && MemT == MVT::v4i16) || 628 (ExtT == MVT::v2i64 && MemT == MVT::v2i32); 629 } 630 631 EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL, 632 LLVMContext &C, 633 EVT VT) const { 634 if (VT.isVector()) 635 return VT.changeVectorElementTypeToInteger(); 636 637 // So far, all branch instructions in Wasm take an I32 condition. 638 // The default TargetLowering::getSetCCResultType returns the pointer size, 639 // which would be useful to reduce instruction counts when testing 640 // against 64-bit pointers/values if at some point Wasm supports that. 641 return EVT::getIntegerVT(C, 32); 642 } 643 644 bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 645 const CallInst &I, 646 MachineFunction &MF, 647 unsigned Intrinsic) const { 648 switch (Intrinsic) { 649 case Intrinsic::wasm_atomic_notify: 650 Info.opc = ISD::INTRINSIC_W_CHAIN; 651 Info.memVT = MVT::i32; 652 Info.ptrVal = I.getArgOperand(0); 653 Info.offset = 0; 654 Info.align = Align(4); 655 // atomic.notify instruction does not really load the memory specified with 656 // this argument, but MachineMemOperand should either be load or store, so 657 // we set this to a load. 658 // FIXME Volatile isn't really correct, but currently all LLVM atomic 659 // instructions are treated as volatiles in the backend, so we should be 660 // consistent. The same applies for wasm_atomic_wait intrinsics too. 661 Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; 662 return true; 663 case Intrinsic::wasm_atomic_wait_i32: 664 Info.opc = ISD::INTRINSIC_W_CHAIN; 665 Info.memVT = MVT::i32; 666 Info.ptrVal = I.getArgOperand(0); 667 Info.offset = 0; 668 Info.align = Align(4); 669 Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; 670 return true; 671 case Intrinsic::wasm_atomic_wait_i64: 672 Info.opc = ISD::INTRINSIC_W_CHAIN; 673 Info.memVT = MVT::i64; 674 Info.ptrVal = I.getArgOperand(0); 675 Info.offset = 0; 676 Info.align = Align(8); 677 Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; 678 return true; 679 case Intrinsic::wasm_load32_zero: 680 case Intrinsic::wasm_load64_zero: 681 Info.opc = ISD::INTRINSIC_W_CHAIN; 682 Info.memVT = Intrinsic == Intrinsic::wasm_load32_zero ? MVT::i32 : MVT::i64; 683 Info.ptrVal = I.getArgOperand(0); 684 Info.offset = 0; 685 Info.align = Info.memVT == MVT::i32 ? Align(4) : Align(8); 686 Info.flags = MachineMemOperand::MOLoad; 687 return true; 688 default: 689 return false; 690 } 691 } 692 693 //===----------------------------------------------------------------------===// 694 // WebAssembly Lowering private implementation. 695 //===----------------------------------------------------------------------===// 696 697 //===----------------------------------------------------------------------===// 698 // Lowering Code 699 //===----------------------------------------------------------------------===// 700 701 static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) { 702 MachineFunction &MF = DAG.getMachineFunction(); 703 DAG.getContext()->diagnose( 704 DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc())); 705 } 706 707 // Test whether the given calling convention is supported. 708 static bool callingConvSupported(CallingConv::ID CallConv) { 709 // We currently support the language-independent target-independent 710 // conventions. We don't yet have a way to annotate calls with properties like 711 // "cold", and we don't have any call-clobbered registers, so these are mostly 712 // all handled the same. 713 return CallConv == CallingConv::C || CallConv == CallingConv::Fast || 714 CallConv == CallingConv::Cold || 715 CallConv == CallingConv::PreserveMost || 716 CallConv == CallingConv::PreserveAll || 717 CallConv == CallingConv::CXX_FAST_TLS || 718 CallConv == CallingConv::WASM_EmscriptenInvoke || 719 CallConv == CallingConv::Swift; 720 } 721 722 SDValue 723 WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, 724 SmallVectorImpl<SDValue> &InVals) const { 725 SelectionDAG &DAG = CLI.DAG; 726 SDLoc DL = CLI.DL; 727 SDValue Chain = CLI.Chain; 728 SDValue Callee = CLI.Callee; 729 MachineFunction &MF = DAG.getMachineFunction(); 730 auto Layout = MF.getDataLayout(); 731 732 CallingConv::ID CallConv = CLI.CallConv; 733 if (!callingConvSupported(CallConv)) 734 fail(DL, DAG, 735 "WebAssembly doesn't support language-specific or target-specific " 736 "calling conventions yet"); 737 if (CLI.IsPatchPoint) 738 fail(DL, DAG, "WebAssembly doesn't support patch point yet"); 739 740 if (CLI.IsTailCall) { 741 auto NoTail = [&](const char *Msg) { 742 if (CLI.CB && CLI.CB->isMustTailCall()) 743 fail(DL, DAG, Msg); 744 CLI.IsTailCall = false; 745 }; 746 747 if (!Subtarget->hasTailCall()) 748 NoTail("WebAssembly 'tail-call' feature not enabled"); 749 750 // Varargs calls cannot be tail calls because the buffer is on the stack 751 if (CLI.IsVarArg) 752 NoTail("WebAssembly does not support varargs tail calls"); 753 754 // Do not tail call unless caller and callee return types match 755 const Function &F = MF.getFunction(); 756 const TargetMachine &TM = getTargetMachine(); 757 Type *RetTy = F.getReturnType(); 758 SmallVector<MVT, 4> CallerRetTys; 759 SmallVector<MVT, 4> CalleeRetTys; 760 computeLegalValueVTs(F, TM, RetTy, CallerRetTys); 761 computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys); 762 bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() && 763 std::equal(CallerRetTys.begin(), CallerRetTys.end(), 764 CalleeRetTys.begin()); 765 if (!TypesMatch) 766 NoTail("WebAssembly tail call requires caller and callee return types to " 767 "match"); 768 769 // If pointers to local stack values are passed, we cannot tail call 770 if (CLI.CB) { 771 for (auto &Arg : CLI.CB->args()) { 772 Value *Val = Arg.get(); 773 // Trace the value back through pointer operations 774 while (true) { 775 Value *Src = Val->stripPointerCastsAndAliases(); 776 if (auto *GEP = dyn_cast<GetElementPtrInst>(Src)) 777 Src = GEP->getPointerOperand(); 778 if (Val == Src) 779 break; 780 Val = Src; 781 } 782 if (isa<AllocaInst>(Val)) { 783 NoTail( 784 "WebAssembly does not support tail calling with stack arguments"); 785 break; 786 } 787 } 788 } 789 } 790 791 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 792 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 793 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 794 795 // The generic code may have added an sret argument. If we're lowering an 796 // invoke function, the ABI requires that the function pointer be the first 797 // argument, so we may have to swap the arguments. 798 if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 && 799 Outs[0].Flags.isSRet()) { 800 std::swap(Outs[0], Outs[1]); 801 std::swap(OutVals[0], OutVals[1]); 802 } 803 804 bool HasSwiftSelfArg = false; 805 bool HasSwiftErrorArg = false; 806 unsigned NumFixedArgs = 0; 807 for (unsigned I = 0; I < Outs.size(); ++I) { 808 const ISD::OutputArg &Out = Outs[I]; 809 SDValue &OutVal = OutVals[I]; 810 HasSwiftSelfArg |= Out.Flags.isSwiftSelf(); 811 HasSwiftErrorArg |= Out.Flags.isSwiftError(); 812 if (Out.Flags.isNest()) 813 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); 814 if (Out.Flags.isInAlloca()) 815 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); 816 if (Out.Flags.isInConsecutiveRegs()) 817 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); 818 if (Out.Flags.isInConsecutiveRegsLast()) 819 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); 820 if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) { 821 auto &MFI = MF.getFrameInfo(); 822 int FI = MFI.CreateStackObject(Out.Flags.getByValSize(), 823 Out.Flags.getNonZeroByValAlign(), 824 /*isSS=*/false); 825 SDValue SizeNode = 826 DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32); 827 SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout)); 828 Chain = DAG.getMemcpy( 829 Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getNonZeroByValAlign(), 830 /*isVolatile*/ false, /*AlwaysInline=*/false, 831 /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo()); 832 OutVal = FINode; 833 } 834 // Count the number of fixed args *after* legalization. 835 NumFixedArgs += Out.IsFixed; 836 } 837 838 bool IsVarArg = CLI.IsVarArg; 839 auto PtrVT = getPointerTy(Layout); 840 841 // For swiftcc, emit additional swiftself and swifterror arguments 842 // if there aren't. These additional arguments are also added for callee 843 // signature They are necessary to match callee and caller signature for 844 // indirect call. 845 if (CallConv == CallingConv::Swift) { 846 if (!HasSwiftSelfArg) { 847 NumFixedArgs++; 848 ISD::OutputArg Arg; 849 Arg.Flags.setSwiftSelf(); 850 CLI.Outs.push_back(Arg); 851 SDValue ArgVal = DAG.getUNDEF(PtrVT); 852 CLI.OutVals.push_back(ArgVal); 853 } 854 if (!HasSwiftErrorArg) { 855 NumFixedArgs++; 856 ISD::OutputArg Arg; 857 Arg.Flags.setSwiftError(); 858 CLI.Outs.push_back(Arg); 859 SDValue ArgVal = DAG.getUNDEF(PtrVT); 860 CLI.OutVals.push_back(ArgVal); 861 } 862 } 863 864 // Analyze operands of the call, assigning locations to each operand. 865 SmallVector<CCValAssign, 16> ArgLocs; 866 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 867 868 if (IsVarArg) { 869 // Outgoing non-fixed arguments are placed in a buffer. First 870 // compute their offsets and the total amount of buffer space needed. 871 for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) { 872 const ISD::OutputArg &Out = Outs[I]; 873 SDValue &Arg = OutVals[I]; 874 EVT VT = Arg.getValueType(); 875 assert(VT != MVT::iPTR && "Legalized args should be concrete"); 876 Type *Ty = VT.getTypeForEVT(*DAG.getContext()); 877 Align Alignment = 878 std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty)); 879 unsigned Offset = 880 CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment); 881 CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(), 882 Offset, VT.getSimpleVT(), 883 CCValAssign::Full)); 884 } 885 } 886 887 unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); 888 889 SDValue FINode; 890 if (IsVarArg && NumBytes) { 891 // For non-fixed arguments, next emit stores to store the argument values 892 // to the stack buffer at the offsets computed above. 893 int FI = MF.getFrameInfo().CreateStackObject(NumBytes, 894 Layout.getStackAlignment(), 895 /*isSS=*/false); 896 unsigned ValNo = 0; 897 SmallVector<SDValue, 8> Chains; 898 for (SDValue Arg : 899 make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) { 900 assert(ArgLocs[ValNo].getValNo() == ValNo && 901 "ArgLocs should remain in order and only hold varargs args"); 902 unsigned Offset = ArgLocs[ValNo++].getLocMemOffset(); 903 FINode = DAG.getFrameIndex(FI, getPointerTy(Layout)); 904 SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode, 905 DAG.getConstant(Offset, DL, PtrVT)); 906 Chains.push_back( 907 DAG.getStore(Chain, DL, Arg, Add, 908 MachinePointerInfo::getFixedStack(MF, FI, Offset))); 909 } 910 if (!Chains.empty()) 911 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); 912 } else if (IsVarArg) { 913 FINode = DAG.getIntPtrConstant(0, DL); 914 } 915 916 if (Callee->getOpcode() == ISD::GlobalAddress) { 917 // If the callee is a GlobalAddress node (quite common, every direct call 918 // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress 919 // doesn't at MO_GOT which is not needed for direct calls. 920 GlobalAddressSDNode* GA = cast<GlobalAddressSDNode>(Callee); 921 Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), DL, 922 getPointerTy(DAG.getDataLayout()), 923 GA->getOffset()); 924 Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL, 925 getPointerTy(DAG.getDataLayout()), Callee); 926 } 927 928 // Compute the operands for the CALLn node. 929 SmallVector<SDValue, 16> Ops; 930 Ops.push_back(Chain); 931 Ops.push_back(Callee); 932 933 // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs 934 // isn't reliable. 935 Ops.append(OutVals.begin(), 936 IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end()); 937 // Add a pointer to the vararg buffer. 938 if (IsVarArg) 939 Ops.push_back(FINode); 940 941 SmallVector<EVT, 8> InTys; 942 for (const auto &In : Ins) { 943 assert(!In.Flags.isByVal() && "byval is not valid for return values"); 944 assert(!In.Flags.isNest() && "nest is not valid for return values"); 945 if (In.Flags.isInAlloca()) 946 fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values"); 947 if (In.Flags.isInConsecutiveRegs()) 948 fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values"); 949 if (In.Flags.isInConsecutiveRegsLast()) 950 fail(DL, DAG, 951 "WebAssembly hasn't implemented cons regs last return values"); 952 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in 953 // registers. 954 InTys.push_back(In.VT); 955 } 956 957 if (CLI.IsTailCall) { 958 // ret_calls do not return values to the current frame 959 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 960 return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops); 961 } 962 963 InTys.push_back(MVT::Other); 964 SDVTList InTyList = DAG.getVTList(InTys); 965 SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops); 966 967 for (size_t I = 0; I < Ins.size(); ++I) 968 InVals.push_back(Res.getValue(I)); 969 970 // Return the chain 971 return Res.getValue(Ins.size()); 972 } 973 974 bool WebAssemblyTargetLowering::CanLowerReturn( 975 CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/, 976 const SmallVectorImpl<ISD::OutputArg> &Outs, 977 LLVMContext & /*Context*/) const { 978 // WebAssembly can only handle returning tuples with multivalue enabled 979 return Subtarget->hasMultivalue() || Outs.size() <= 1; 980 } 981 982 SDValue WebAssemblyTargetLowering::LowerReturn( 983 SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/, 984 const SmallVectorImpl<ISD::OutputArg> &Outs, 985 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 986 SelectionDAG &DAG) const { 987 assert((Subtarget->hasMultivalue() || Outs.size() <= 1) && 988 "MVP WebAssembly can only return up to one value"); 989 if (!callingConvSupported(CallConv)) 990 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); 991 992 SmallVector<SDValue, 4> RetOps(1, Chain); 993 RetOps.append(OutVals.begin(), OutVals.end()); 994 Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps); 995 996 // Record the number and types of the return values. 997 for (const ISD::OutputArg &Out : Outs) { 998 assert(!Out.Flags.isByVal() && "byval is not valid for return values"); 999 assert(!Out.Flags.isNest() && "nest is not valid for return values"); 1000 assert(Out.IsFixed && "non-fixed return value is not valid"); 1001 if (Out.Flags.isInAlloca()) 1002 fail(DL, DAG, "WebAssembly hasn't implemented inalloca results"); 1003 if (Out.Flags.isInConsecutiveRegs()) 1004 fail(DL, DAG, "WebAssembly hasn't implemented cons regs results"); 1005 if (Out.Flags.isInConsecutiveRegsLast()) 1006 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results"); 1007 } 1008 1009 return Chain; 1010 } 1011 1012 SDValue WebAssemblyTargetLowering::LowerFormalArguments( 1013 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 1014 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 1015 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 1016 if (!callingConvSupported(CallConv)) 1017 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); 1018 1019 MachineFunction &MF = DAG.getMachineFunction(); 1020 auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>(); 1021 1022 // Set up the incoming ARGUMENTS value, which serves to represent the liveness 1023 // of the incoming values before they're represented by virtual registers. 1024 MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS); 1025 1026 bool HasSwiftErrorArg = false; 1027 bool HasSwiftSelfArg = false; 1028 for (const ISD::InputArg &In : Ins) { 1029 HasSwiftSelfArg |= In.Flags.isSwiftSelf(); 1030 HasSwiftErrorArg |= In.Flags.isSwiftError(); 1031 if (In.Flags.isInAlloca()) 1032 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); 1033 if (In.Flags.isNest()) 1034 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); 1035 if (In.Flags.isInConsecutiveRegs()) 1036 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); 1037 if (In.Flags.isInConsecutiveRegsLast()) 1038 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); 1039 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in 1040 // registers. 1041 InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT, 1042 DAG.getTargetConstant(InVals.size(), 1043 DL, MVT::i32)) 1044 : DAG.getUNDEF(In.VT)); 1045 1046 // Record the number and types of arguments. 1047 MFI->addParam(In.VT); 1048 } 1049 1050 // For swiftcc, emit additional swiftself and swifterror arguments 1051 // if there aren't. These additional arguments are also added for callee 1052 // signature They are necessary to match callee and caller signature for 1053 // indirect call. 1054 auto PtrVT = getPointerTy(MF.getDataLayout()); 1055 if (CallConv == CallingConv::Swift) { 1056 if (!HasSwiftSelfArg) { 1057 MFI->addParam(PtrVT); 1058 } 1059 if (!HasSwiftErrorArg) { 1060 MFI->addParam(PtrVT); 1061 } 1062 } 1063 // Varargs are copied into a buffer allocated by the caller, and a pointer to 1064 // the buffer is passed as an argument. 1065 if (IsVarArg) { 1066 MVT PtrVT = getPointerTy(MF.getDataLayout()); 1067 Register VarargVreg = 1068 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT)); 1069 MFI->setVarargBufferVreg(VarargVreg); 1070 Chain = DAG.getCopyToReg( 1071 Chain, DL, VarargVreg, 1072 DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT, 1073 DAG.getTargetConstant(Ins.size(), DL, MVT::i32))); 1074 MFI->addParam(PtrVT); 1075 } 1076 1077 // Record the number and types of arguments and results. 1078 SmallVector<MVT, 4> Params; 1079 SmallVector<MVT, 4> Results; 1080 computeSignatureVTs(MF.getFunction().getFunctionType(), &MF.getFunction(), 1081 MF.getFunction(), DAG.getTarget(), Params, Results); 1082 for (MVT VT : Results) 1083 MFI->addResult(VT); 1084 // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify 1085 // the param logic here with ComputeSignatureVTs 1086 assert(MFI->getParams().size() == Params.size() && 1087 std::equal(MFI->getParams().begin(), MFI->getParams().end(), 1088 Params.begin())); 1089 1090 return Chain; 1091 } 1092 1093 void WebAssemblyTargetLowering::ReplaceNodeResults( 1094 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 1095 switch (N->getOpcode()) { 1096 case ISD::SIGN_EXTEND_INREG: 1097 // Do not add any results, signifying that N should not be custom lowered 1098 // after all. This happens because simd128 turns on custom lowering for 1099 // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an 1100 // illegal type. 1101 break; 1102 default: 1103 llvm_unreachable( 1104 "ReplaceNodeResults not implemented for this op for WebAssembly!"); 1105 } 1106 } 1107 1108 //===----------------------------------------------------------------------===// 1109 // Custom lowering hooks. 1110 //===----------------------------------------------------------------------===// 1111 1112 SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op, 1113 SelectionDAG &DAG) const { 1114 SDLoc DL(Op); 1115 switch (Op.getOpcode()) { 1116 default: 1117 llvm_unreachable("unimplemented operation lowering"); 1118 return SDValue(); 1119 case ISD::FrameIndex: 1120 return LowerFrameIndex(Op, DAG); 1121 case ISD::GlobalAddress: 1122 return LowerGlobalAddress(Op, DAG); 1123 case ISD::ExternalSymbol: 1124 return LowerExternalSymbol(Op, DAG); 1125 case ISD::JumpTable: 1126 return LowerJumpTable(Op, DAG); 1127 case ISD::BR_JT: 1128 return LowerBR_JT(Op, DAG); 1129 case ISD::VASTART: 1130 return LowerVASTART(Op, DAG); 1131 case ISD::BlockAddress: 1132 case ISD::BRIND: 1133 fail(DL, DAG, "WebAssembly hasn't implemented computed gotos"); 1134 return SDValue(); 1135 case ISD::RETURNADDR: 1136 return LowerRETURNADDR(Op, DAG); 1137 case ISD::FRAMEADDR: 1138 return LowerFRAMEADDR(Op, DAG); 1139 case ISD::CopyToReg: 1140 return LowerCopyToReg(Op, DAG); 1141 case ISD::EXTRACT_VECTOR_ELT: 1142 case ISD::INSERT_VECTOR_ELT: 1143 return LowerAccessVectorElement(Op, DAG); 1144 case ISD::INTRINSIC_VOID: 1145 case ISD::INTRINSIC_WO_CHAIN: 1146 case ISD::INTRINSIC_W_CHAIN: 1147 return LowerIntrinsic(Op, DAG); 1148 case ISD::SIGN_EXTEND_INREG: 1149 return LowerSIGN_EXTEND_INREG(Op, DAG); 1150 case ISD::BUILD_VECTOR: 1151 return LowerBUILD_VECTOR(Op, DAG); 1152 case ISD::VECTOR_SHUFFLE: 1153 return LowerVECTOR_SHUFFLE(Op, DAG); 1154 case ISD::SETCC: 1155 return LowerSETCC(Op, DAG); 1156 case ISD::SHL: 1157 case ISD::SRA: 1158 case ISD::SRL: 1159 return LowerShift(Op, DAG); 1160 } 1161 } 1162 1163 SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op, 1164 SelectionDAG &DAG) const { 1165 SDValue Src = Op.getOperand(2); 1166 if (isa<FrameIndexSDNode>(Src.getNode())) { 1167 // CopyToReg nodes don't support FrameIndex operands. Other targets select 1168 // the FI to some LEA-like instruction, but since we don't have that, we 1169 // need to insert some kind of instruction that can take an FI operand and 1170 // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy 1171 // local.copy between Op and its FI operand. 1172 SDValue Chain = Op.getOperand(0); 1173 SDLoc DL(Op); 1174 unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg(); 1175 EVT VT = Src.getValueType(); 1176 SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32 1177 : WebAssembly::COPY_I64, 1178 DL, VT, Src), 1179 0); 1180 return Op.getNode()->getNumValues() == 1 1181 ? DAG.getCopyToReg(Chain, DL, Reg, Copy) 1182 : DAG.getCopyToReg(Chain, DL, Reg, Copy, 1183 Op.getNumOperands() == 4 ? Op.getOperand(3) 1184 : SDValue()); 1185 } 1186 return SDValue(); 1187 } 1188 1189 SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op, 1190 SelectionDAG &DAG) const { 1191 int FI = cast<FrameIndexSDNode>(Op)->getIndex(); 1192 return DAG.getTargetFrameIndex(FI, Op.getValueType()); 1193 } 1194 1195 SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op, 1196 SelectionDAG &DAG) const { 1197 SDLoc DL(Op); 1198 1199 if (!Subtarget->getTargetTriple().isOSEmscripten()) { 1200 fail(DL, DAG, 1201 "Non-Emscripten WebAssembly hasn't implemented " 1202 "__builtin_return_address"); 1203 return SDValue(); 1204 } 1205 1206 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 1207 return SDValue(); 1208 1209 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1210 MakeLibCallOptions CallOptions; 1211 return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(), 1212 {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL) 1213 .first; 1214 } 1215 1216 SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op, 1217 SelectionDAG &DAG) const { 1218 // Non-zero depths are not supported by WebAssembly currently. Use the 1219 // legalizer's default expansion, which is to return 0 (what this function is 1220 // documented to do). 1221 if (Op.getConstantOperandVal(0) > 0) 1222 return SDValue(); 1223 1224 DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true); 1225 EVT VT = Op.getValueType(); 1226 Register FP = 1227 Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction()); 1228 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT); 1229 } 1230 1231 SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op, 1232 SelectionDAG &DAG) const { 1233 SDLoc DL(Op); 1234 const auto *GA = cast<GlobalAddressSDNode>(Op); 1235 EVT VT = Op.getValueType(); 1236 assert(GA->getTargetFlags() == 0 && 1237 "Unexpected target flags on generic GlobalAddressSDNode"); 1238 if (GA->getAddressSpace() != 0) 1239 fail(DL, DAG, "WebAssembly only expects the 0 address space"); 1240 1241 unsigned OperandFlags = 0; 1242 if (isPositionIndependent()) { 1243 const GlobalValue *GV = GA->getGlobal(); 1244 if (getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) { 1245 MachineFunction &MF = DAG.getMachineFunction(); 1246 MVT PtrVT = getPointerTy(MF.getDataLayout()); 1247 const char *BaseName; 1248 if (GV->getValueType()->isFunctionTy()) { 1249 BaseName = MF.createExternalSymbolName("__table_base"); 1250 OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL; 1251 } 1252 else { 1253 BaseName = MF.createExternalSymbolName("__memory_base"); 1254 OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL; 1255 } 1256 SDValue BaseAddr = 1257 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, 1258 DAG.getTargetExternalSymbol(BaseName, PtrVT)); 1259 1260 SDValue SymAddr = DAG.getNode( 1261 WebAssemblyISD::WrapperPIC, DL, VT, 1262 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(), 1263 OperandFlags)); 1264 1265 return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr); 1266 } else { 1267 OperandFlags = WebAssemblyII::MO_GOT; 1268 } 1269 } 1270 1271 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, 1272 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, 1273 GA->getOffset(), OperandFlags)); 1274 } 1275 1276 SDValue 1277 WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op, 1278 SelectionDAG &DAG) const { 1279 SDLoc DL(Op); 1280 const auto *ES = cast<ExternalSymbolSDNode>(Op); 1281 EVT VT = Op.getValueType(); 1282 assert(ES->getTargetFlags() == 0 && 1283 "Unexpected target flags on generic ExternalSymbolSDNode"); 1284 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, 1285 DAG.getTargetExternalSymbol(ES->getSymbol(), VT)); 1286 } 1287 1288 SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op, 1289 SelectionDAG &DAG) const { 1290 // There's no need for a Wrapper node because we always incorporate a jump 1291 // table operand into a BR_TABLE instruction, rather than ever 1292 // materializing it in a register. 1293 const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 1294 return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(), 1295 JT->getTargetFlags()); 1296 } 1297 1298 SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op, 1299 SelectionDAG &DAG) const { 1300 SDLoc DL(Op); 1301 SDValue Chain = Op.getOperand(0); 1302 const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1)); 1303 SDValue Index = Op.getOperand(2); 1304 assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags"); 1305 1306 SmallVector<SDValue, 8> Ops; 1307 Ops.push_back(Chain); 1308 Ops.push_back(Index); 1309 1310 MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo(); 1311 const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs; 1312 1313 // Add an operand for each case. 1314 for (auto MBB : MBBs) 1315 Ops.push_back(DAG.getBasicBlock(MBB)); 1316 1317 // Add the first MBB as a dummy default target for now. This will be replaced 1318 // with the proper default target (and the preceding range check eliminated) 1319 // if possible by WebAssemblyFixBrTableDefaults. 1320 Ops.push_back(DAG.getBasicBlock(*MBBs.begin())); 1321 return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops); 1322 } 1323 1324 SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op, 1325 SelectionDAG &DAG) const { 1326 SDLoc DL(Op); 1327 EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout()); 1328 1329 auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>(); 1330 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1331 1332 SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL, 1333 MFI->getVarargBufferVreg(), PtrVT); 1334 return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1), 1335 MachinePointerInfo(SV)); 1336 } 1337 1338 SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op, 1339 SelectionDAG &DAG) const { 1340 MachineFunction &MF = DAG.getMachineFunction(); 1341 unsigned IntNo; 1342 switch (Op.getOpcode()) { 1343 case ISD::INTRINSIC_VOID: 1344 case ISD::INTRINSIC_W_CHAIN: 1345 IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 1346 break; 1347 case ISD::INTRINSIC_WO_CHAIN: 1348 IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1349 break; 1350 default: 1351 llvm_unreachable("Invalid intrinsic"); 1352 } 1353 SDLoc DL(Op); 1354 1355 switch (IntNo) { 1356 default: 1357 return SDValue(); // Don't custom lower most intrinsics. 1358 1359 case Intrinsic::wasm_lsda: { 1360 EVT VT = Op.getValueType(); 1361 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 1362 MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); 1363 auto &Context = MF.getMMI().getContext(); 1364 MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") + 1365 Twine(MF.getFunctionNumber())); 1366 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, 1367 DAG.getMCSymbol(S, PtrVT)); 1368 } 1369 1370 case Intrinsic::wasm_throw: { 1371 // We only support C++ exceptions for now 1372 int Tag = cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 1373 if (Tag != CPP_EXCEPTION) 1374 llvm_unreachable("Invalid tag!"); 1375 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 1376 MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); 1377 const char *SymName = MF.createExternalSymbolName("__cpp_exception"); 1378 SDValue SymNode = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, 1379 DAG.getTargetExternalSymbol(SymName, PtrVT)); 1380 return DAG.getNode(WebAssemblyISD::THROW, DL, 1381 MVT::Other, // outchain type 1382 { 1383 Op.getOperand(0), // inchain 1384 SymNode, // exception symbol 1385 Op.getOperand(3) // thrown value 1386 }); 1387 } 1388 1389 case Intrinsic::wasm_shuffle: { 1390 // Drop in-chain and replace undefs, but otherwise pass through unchanged 1391 SDValue Ops[18]; 1392 size_t OpIdx = 0; 1393 Ops[OpIdx++] = Op.getOperand(1); 1394 Ops[OpIdx++] = Op.getOperand(2); 1395 while (OpIdx < 18) { 1396 const SDValue &MaskIdx = Op.getOperand(OpIdx + 1); 1397 if (MaskIdx.isUndef() || 1398 cast<ConstantSDNode>(MaskIdx.getNode())->getZExtValue() >= 32) { 1399 Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32); 1400 } else { 1401 Ops[OpIdx++] = MaskIdx; 1402 } 1403 } 1404 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops); 1405 } 1406 } 1407 } 1408 1409 SDValue 1410 WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, 1411 SelectionDAG &DAG) const { 1412 SDLoc DL(Op); 1413 // If sign extension operations are disabled, allow sext_inreg only if operand 1414 // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign 1415 // extension operations, but allowing sext_inreg in this context lets us have 1416 // simple patterns to select extract_lane_s instructions. Expanding sext_inreg 1417 // everywhere would be simpler in this file, but would necessitate large and 1418 // brittle patterns to undo the expansion and select extract_lane_s 1419 // instructions. 1420 assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128()); 1421 if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT) 1422 return SDValue(); 1423 1424 const SDValue &Extract = Op.getOperand(0); 1425 MVT VecT = Extract.getOperand(0).getSimpleValueType(); 1426 if (VecT.getVectorElementType().getSizeInBits() > 32) 1427 return SDValue(); 1428 MVT ExtractedLaneT = 1429 cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT(); 1430 MVT ExtractedVecT = 1431 MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits()); 1432 if (ExtractedVecT == VecT) 1433 return Op; 1434 1435 // Bitcast vector to appropriate type to ensure ISel pattern coverage 1436 const SDNode *Index = Extract.getOperand(1).getNode(); 1437 if (!isa<ConstantSDNode>(Index)) 1438 return SDValue(); 1439 unsigned IndexVal = cast<ConstantSDNode>(Index)->getZExtValue(); 1440 unsigned Scale = 1441 ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements(); 1442 assert(Scale > 1); 1443 SDValue NewIndex = 1444 DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0)); 1445 SDValue NewExtract = DAG.getNode( 1446 ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(), 1447 DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex); 1448 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract, 1449 Op.getOperand(1)); 1450 } 1451 1452 SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, 1453 SelectionDAG &DAG) const { 1454 SDLoc DL(Op); 1455 const EVT VecT = Op.getValueType(); 1456 const EVT LaneT = Op.getOperand(0).getValueType(); 1457 const size_t Lanes = Op.getNumOperands(); 1458 bool CanSwizzle = VecT == MVT::v16i8; 1459 1460 // BUILD_VECTORs are lowered to the instruction that initializes the highest 1461 // possible number of lanes at once followed by a sequence of replace_lane 1462 // instructions to individually initialize any remaining lanes. 1463 1464 // TODO: Tune this. For example, lanewise swizzling is very expensive, so 1465 // swizzled lanes should be given greater weight. 1466 1467 // TODO: Investigate building vectors by shuffling together vectors built by 1468 // separately specialized means. 1469 1470 auto IsConstant = [](const SDValue &V) { 1471 return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP; 1472 }; 1473 1474 // Returns the source vector and index vector pair if they exist. Checks for: 1475 // (extract_vector_elt 1476 // $src, 1477 // (sign_extend_inreg (extract_vector_elt $indices, $i)) 1478 // ) 1479 auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) { 1480 auto Bail = std::make_pair(SDValue(), SDValue()); 1481 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 1482 return Bail; 1483 const SDValue &SwizzleSrc = Lane->getOperand(0); 1484 const SDValue &IndexExt = Lane->getOperand(1); 1485 if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG) 1486 return Bail; 1487 const SDValue &Index = IndexExt->getOperand(0); 1488 if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 1489 return Bail; 1490 const SDValue &SwizzleIndices = Index->getOperand(0); 1491 if (SwizzleSrc.getValueType() != MVT::v16i8 || 1492 SwizzleIndices.getValueType() != MVT::v16i8 || 1493 Index->getOperand(1)->getOpcode() != ISD::Constant || 1494 Index->getConstantOperandVal(1) != I) 1495 return Bail; 1496 return std::make_pair(SwizzleSrc, SwizzleIndices); 1497 }; 1498 1499 using ValueEntry = std::pair<SDValue, size_t>; 1500 SmallVector<ValueEntry, 16> SplatValueCounts; 1501 1502 using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>; 1503 SmallVector<SwizzleEntry, 16> SwizzleCounts; 1504 1505 auto AddCount = [](auto &Counts, const auto &Val) { 1506 auto CountIt = std::find_if(Counts.begin(), Counts.end(), 1507 [&Val](auto E) { return E.first == Val; }); 1508 if (CountIt == Counts.end()) { 1509 Counts.emplace_back(Val, 1); 1510 } else { 1511 CountIt->second++; 1512 } 1513 }; 1514 1515 auto GetMostCommon = [](auto &Counts) { 1516 auto CommonIt = 1517 std::max_element(Counts.begin(), Counts.end(), 1518 [](auto A, auto B) { return A.second < B.second; }); 1519 assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector"); 1520 return *CommonIt; 1521 }; 1522 1523 size_t NumConstantLanes = 0; 1524 1525 // Count eligible lanes for each type of vector creation op 1526 for (size_t I = 0; I < Lanes; ++I) { 1527 const SDValue &Lane = Op->getOperand(I); 1528 if (Lane.isUndef()) 1529 continue; 1530 1531 AddCount(SplatValueCounts, Lane); 1532 1533 if (IsConstant(Lane)) { 1534 NumConstantLanes++; 1535 } else if (CanSwizzle) { 1536 auto SwizzleSrcs = GetSwizzleSrcs(I, Lane); 1537 if (SwizzleSrcs.first) 1538 AddCount(SwizzleCounts, SwizzleSrcs); 1539 } 1540 } 1541 1542 SDValue SplatValue; 1543 size_t NumSplatLanes; 1544 std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts); 1545 1546 SDValue SwizzleSrc; 1547 SDValue SwizzleIndices; 1548 size_t NumSwizzleLanes = 0; 1549 if (SwizzleCounts.size()) 1550 std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices), 1551 NumSwizzleLanes) = GetMostCommon(SwizzleCounts); 1552 1553 // Predicate returning true if the lane is properly initialized by the 1554 // original instruction 1555 std::function<bool(size_t, const SDValue &)> IsLaneConstructed; 1556 SDValue Result; 1557 // Prefer swizzles over vector consts over splats 1558 if (NumSwizzleLanes >= NumSplatLanes && 1559 (!Subtarget->hasUnimplementedSIMD128() || 1560 NumSwizzleLanes >= NumConstantLanes)) { 1561 Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc, 1562 SwizzleIndices); 1563 auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices); 1564 IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) { 1565 return Swizzled == GetSwizzleSrcs(I, Lane); 1566 }; 1567 } else if (NumConstantLanes >= NumSplatLanes && 1568 Subtarget->hasUnimplementedSIMD128()) { 1569 // If we support v128.const, emit it directly 1570 SmallVector<SDValue, 16> ConstLanes; 1571 for (const SDValue &Lane : Op->op_values()) { 1572 if (IsConstant(Lane)) { 1573 ConstLanes.push_back(Lane); 1574 } else if (LaneT.isFloatingPoint()) { 1575 ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT)); 1576 } else { 1577 ConstLanes.push_back(DAG.getConstant(0, DL, LaneT)); 1578 } 1579 } 1580 Result = DAG.getBuildVector(VecT, DL, ConstLanes); 1581 IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) { 1582 return IsConstant(Lane); 1583 }; 1584 } else if (NumConstantLanes >= NumSplatLanes && VecT.isInteger()) { 1585 // Otherwise, if this is an integer vector, pack the lane values together so 1586 // we can construct the 128-bit constant from a pair of i64s using a splat 1587 // followed by at most one i64x2.replace_lane. Also keep track of the lanes 1588 // that actually matter so we can avoid the replace_lane in more cases. 1589 std::array<uint64_t, 2> I64s{{0, 0}}; 1590 std::array<uint64_t, 2> ConstLaneMasks{{0, 0}}; 1591 size_t LaneBits = 128 / Lanes; 1592 size_t HalfLanes = Lanes / 2; 1593 for (size_t I = 0; I < Lanes; ++I) { 1594 const SDValue &Lane = Op.getOperand(I); 1595 if (IsConstant(Lane)) { 1596 // How much we need to shift Val to position it in an i64 1597 auto Shift = LaneBits * (I % HalfLanes); 1598 auto Mask = maskTrailingOnes<uint64_t>(LaneBits); 1599 auto Val = cast<ConstantSDNode>(Lane.getNode())->getZExtValue() & Mask; 1600 I64s[I / HalfLanes] |= Val << Shift; 1601 ConstLaneMasks[I / HalfLanes] |= Mask << Shift; 1602 } 1603 } 1604 // Check whether all constant lanes in the second half of the vector are 1605 // equivalent in the first half or vice versa to determine whether splatting 1606 // either side will be sufficient to materialize the constant. As a special 1607 // case, if the first and second halves have no constant lanes in common, we 1608 // can just combine them. 1609 bool FirstHalfSufficient = (I64s[0] & ConstLaneMasks[1]) == I64s[1]; 1610 bool SecondHalfSufficient = (I64s[1] & ConstLaneMasks[0]) == I64s[0]; 1611 bool CombinedSufficient = (ConstLaneMasks[0] & ConstLaneMasks[1]) == 0; 1612 1613 uint64_t Splatted; 1614 if (SecondHalfSufficient) { 1615 Splatted = I64s[1]; 1616 } else if (CombinedSufficient) { 1617 Splatted = I64s[0] | I64s[1]; 1618 } else { 1619 Splatted = I64s[0]; 1620 } 1621 1622 Result = DAG.getSplatBuildVector(MVT::v2i64, DL, 1623 DAG.getConstant(Splatted, DL, MVT::i64)); 1624 if (!FirstHalfSufficient && !SecondHalfSufficient && !CombinedSufficient) { 1625 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i64, Result, 1626 DAG.getConstant(I64s[1], DL, MVT::i64), 1627 DAG.getConstant(1, DL, MVT::i32)); 1628 } 1629 Result = DAG.getBitcast(VecT, Result); 1630 IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) { 1631 return IsConstant(Lane); 1632 }; 1633 } else { 1634 // Use a splat, but possibly a load_splat 1635 LoadSDNode *SplattedLoad; 1636 if ((SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) && 1637 SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) { 1638 Result = DAG.getMemIntrinsicNode( 1639 WebAssemblyISD::LOAD_SPLAT, DL, DAG.getVTList(VecT), 1640 {SplattedLoad->getChain(), SplattedLoad->getBasePtr(), 1641 SplattedLoad->getOffset()}, 1642 SplattedLoad->getMemoryVT(), SplattedLoad->getMemOperand()); 1643 } else { 1644 Result = DAG.getSplatBuildVector(VecT, DL, SplatValue); 1645 } 1646 IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) { 1647 return Lane == SplatValue; 1648 }; 1649 } 1650 1651 assert(Result); 1652 assert(IsLaneConstructed); 1653 1654 // Add replace_lane instructions for any unhandled values 1655 for (size_t I = 0; I < Lanes; ++I) { 1656 const SDValue &Lane = Op->getOperand(I); 1657 if (!Lane.isUndef() && !IsLaneConstructed(I, Lane)) 1658 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane, 1659 DAG.getConstant(I, DL, MVT::i32)); 1660 } 1661 1662 return Result; 1663 } 1664 1665 SDValue 1666 WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, 1667 SelectionDAG &DAG) const { 1668 SDLoc DL(Op); 1669 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask(); 1670 MVT VecType = Op.getOperand(0).getSimpleValueType(); 1671 assert(VecType.is128BitVector() && "Unexpected shuffle vector type"); 1672 size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8; 1673 1674 // Space for two vector args and sixteen mask indices 1675 SDValue Ops[18]; 1676 size_t OpIdx = 0; 1677 Ops[OpIdx++] = Op.getOperand(0); 1678 Ops[OpIdx++] = Op.getOperand(1); 1679 1680 // Expand mask indices to byte indices and materialize them as operands 1681 for (int M : Mask) { 1682 for (size_t J = 0; J < LaneBytes; ++J) { 1683 // Lower undefs (represented by -1 in mask) to zero 1684 uint64_t ByteIndex = M == -1 ? 0 : (uint64_t)M * LaneBytes + J; 1685 Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32); 1686 } 1687 } 1688 1689 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops); 1690 } 1691 1692 SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op, 1693 SelectionDAG &DAG) const { 1694 SDLoc DL(Op); 1695 // The legalizer does not know how to expand the comparison modes of i64x2 1696 // vectors because no comparison modes are supported. We could solve this by 1697 // expanding all i64x2 SETCC nodes, but that seems to expand f64x2 SETCC nodes 1698 // (which return i64x2 results) as well. So instead we manually unroll i64x2 1699 // comparisons here. 1700 assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64); 1701 SmallVector<SDValue, 2> LHS, RHS; 1702 DAG.ExtractVectorElements(Op->getOperand(0), LHS); 1703 DAG.ExtractVectorElements(Op->getOperand(1), RHS); 1704 const SDValue &CC = Op->getOperand(2); 1705 auto MakeLane = [&](unsigned I) { 1706 return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I], 1707 DAG.getConstant(uint64_t(-1), DL, MVT::i64), 1708 DAG.getConstant(uint64_t(0), DL, MVT::i64), CC); 1709 }; 1710 return DAG.getBuildVector(Op->getValueType(0), DL, 1711 {MakeLane(0), MakeLane(1)}); 1712 } 1713 1714 SDValue 1715 WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op, 1716 SelectionDAG &DAG) const { 1717 // Allow constant lane indices, expand variable lane indices 1718 SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode(); 1719 if (isa<ConstantSDNode>(IdxNode) || IdxNode->isUndef()) 1720 return Op; 1721 else 1722 // Perform default expansion 1723 return SDValue(); 1724 } 1725 1726 static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) { 1727 EVT LaneT = Op.getSimpleValueType().getVectorElementType(); 1728 // 32-bit and 64-bit unrolled shifts will have proper semantics 1729 if (LaneT.bitsGE(MVT::i32)) 1730 return DAG.UnrollVectorOp(Op.getNode()); 1731 // Otherwise mask the shift value to get proper semantics from 32-bit shift 1732 SDLoc DL(Op); 1733 size_t NumLanes = Op.getSimpleValueType().getVectorNumElements(); 1734 SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32); 1735 unsigned ShiftOpcode = Op.getOpcode(); 1736 SmallVector<SDValue, 16> ShiftedElements; 1737 DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32); 1738 SmallVector<SDValue, 16> ShiftElements; 1739 DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32); 1740 SmallVector<SDValue, 16> UnrolledOps; 1741 for (size_t i = 0; i < NumLanes; ++i) { 1742 SDValue MaskedShiftValue = 1743 DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask); 1744 SDValue ShiftedValue = ShiftedElements[i]; 1745 if (ShiftOpcode == ISD::SRA) 1746 ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, 1747 ShiftedValue, DAG.getValueType(LaneT)); 1748 UnrolledOps.push_back( 1749 DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue)); 1750 } 1751 return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps); 1752 } 1753 1754 SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op, 1755 SelectionDAG &DAG) const { 1756 SDLoc DL(Op); 1757 1758 // Only manually lower vector shifts 1759 assert(Op.getSimpleValueType().isVector()); 1760 1761 auto ShiftVal = DAG.getSplatValue(Op.getOperand(1)); 1762 if (!ShiftVal) 1763 return unrollVectorShift(Op, DAG); 1764 1765 // Use anyext because none of the high bits can affect the shift 1766 ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32); 1767 1768 unsigned Opcode; 1769 switch (Op.getOpcode()) { 1770 case ISD::SHL: 1771 Opcode = WebAssemblyISD::VEC_SHL; 1772 break; 1773 case ISD::SRA: 1774 Opcode = WebAssemblyISD::VEC_SHR_S; 1775 break; 1776 case ISD::SRL: 1777 Opcode = WebAssemblyISD::VEC_SHR_U; 1778 break; 1779 default: 1780 llvm_unreachable("unexpected opcode"); 1781 } 1782 1783 return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal); 1784 } 1785 1786 //===----------------------------------------------------------------------===// 1787 // Custom DAG combine hooks 1788 //===----------------------------------------------------------------------===// 1789 static SDValue 1790 performVECTOR_SHUFFLECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 1791 auto &DAG = DCI.DAG; 1792 auto Shuffle = cast<ShuffleVectorSDNode>(N); 1793 1794 // Hoist vector bitcasts that don't change the number of lanes out of unary 1795 // shuffles, where they are less likely to get in the way of other combines. 1796 // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) -> 1797 // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask)))) 1798 SDValue Bitcast = N->getOperand(0); 1799 if (Bitcast.getOpcode() != ISD::BITCAST) 1800 return SDValue(); 1801 if (!N->getOperand(1).isUndef()) 1802 return SDValue(); 1803 SDValue CastOp = Bitcast.getOperand(0); 1804 MVT SrcType = CastOp.getSimpleValueType(); 1805 MVT DstType = Bitcast.getSimpleValueType(); 1806 if (!SrcType.is128BitVector() || 1807 SrcType.getVectorNumElements() != DstType.getVectorNumElements()) 1808 return SDValue(); 1809 SDValue NewShuffle = DAG.getVectorShuffle( 1810 SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask()); 1811 return DAG.getBitcast(DstType, NewShuffle); 1812 } 1813 1814 static SDValue performVectorWidenCombine(SDNode *N, 1815 TargetLowering::DAGCombinerInfo &DCI) { 1816 auto &DAG = DCI.DAG; 1817 assert(N->getOpcode() == ISD::SIGN_EXTEND || 1818 N->getOpcode() == ISD::ZERO_EXTEND); 1819 1820 // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if 1821 // possible before the extract_subvector can be expanded. 1822 auto Extract = N->getOperand(0); 1823 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR) 1824 return SDValue(); 1825 auto Source = Extract.getOperand(0); 1826 auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1)); 1827 if (IndexNode == nullptr) 1828 return SDValue(); 1829 auto Index = IndexNode->getZExtValue(); 1830 1831 // Only v8i8 and v4i16 extracts can be widened, and only if the extracted 1832 // subvector is the low or high half of its source. 1833 EVT ResVT = N->getValueType(0); 1834 if (ResVT == MVT::v8i16) { 1835 if (Extract.getValueType() != MVT::v8i8 || 1836 Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8)) 1837 return SDValue(); 1838 } else if (ResVT == MVT::v4i32) { 1839 if (Extract.getValueType() != MVT::v4i16 || 1840 Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4)) 1841 return SDValue(); 1842 } else { 1843 return SDValue(); 1844 } 1845 1846 bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND; 1847 bool IsLow = Index == 0; 1848 1849 unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::WIDEN_LOW_S 1850 : WebAssemblyISD::WIDEN_HIGH_S) 1851 : (IsLow ? WebAssemblyISD::WIDEN_LOW_U 1852 : WebAssemblyISD::WIDEN_HIGH_U); 1853 1854 return DAG.getNode(Op, SDLoc(N), ResVT, Source); 1855 } 1856 1857 SDValue 1858 WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N, 1859 DAGCombinerInfo &DCI) const { 1860 switch (N->getOpcode()) { 1861 default: 1862 return SDValue(); 1863 case ISD::VECTOR_SHUFFLE: 1864 return performVECTOR_SHUFFLECombine(N, DCI); 1865 case ISD::SIGN_EXTEND: 1866 case ISD::ZERO_EXTEND: 1867 return performVectorWidenCombine(N, DCI); 1868 } 1869 } 1870