1 //=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements the WebAssemblyTargetLowering class. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "WebAssemblyISelLowering.h" 15 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 16 #include "Utils/WebAssemblyTypeUtilities.h" 17 #include "Utils/WebAssemblyUtilities.h" 18 #include "WebAssemblyMachineFunctionInfo.h" 19 #include "WebAssemblySubtarget.h" 20 #include "WebAssemblyTargetMachine.h" 21 #include "llvm/CodeGen/CallingConvLower.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineJumpTableInfo.h" 24 #include "llvm/CodeGen/MachineModuleInfo.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/SelectionDAG.h" 27 #include "llvm/CodeGen/SelectionDAGNodes.h" 28 #include "llvm/IR/DiagnosticInfo.h" 29 #include "llvm/IR/DiagnosticPrinter.h" 30 #include "llvm/IR/Function.h" 31 #include "llvm/IR/Intrinsics.h" 32 #include "llvm/IR/IntrinsicsWebAssembly.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/KnownBits.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include "llvm/Target/TargetOptions.h" 39 using namespace llvm; 40 41 #define DEBUG_TYPE "wasm-lower" 42 43 WebAssemblyTargetLowering::WebAssemblyTargetLowering( 44 const TargetMachine &TM, const WebAssemblySubtarget &STI) 45 : TargetLowering(TM), Subtarget(&STI) { 46 auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32; 47 48 // Booleans always contain 0 or 1. 49 setBooleanContents(ZeroOrOneBooleanContent); 50 // Except in SIMD vectors 51 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 52 // We don't know the microarchitecture here, so just reduce register pressure. 53 setSchedulingPreference(Sched::RegPressure); 54 // Tell ISel that we have a stack pointer. 55 setStackPointerRegisterToSaveRestore( 56 Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32); 57 // Set up the register classes. 58 addRegisterClass(MVT::i32, &WebAssembly::I32RegClass); 59 addRegisterClass(MVT::i64, &WebAssembly::I64RegClass); 60 addRegisterClass(MVT::f32, &WebAssembly::F32RegClass); 61 addRegisterClass(MVT::f64, &WebAssembly::F64RegClass); 62 if (Subtarget->hasSIMD128()) { 63 addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass); 64 addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass); 65 addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass); 66 addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass); 67 addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass); 68 addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass); 69 } 70 if (Subtarget->hasReferenceTypes()) { 71 addRegisterClass(MVT::externref, &WebAssembly::EXTERNREFRegClass); 72 addRegisterClass(MVT::funcref, &WebAssembly::FUNCREFRegClass); 73 } 74 // Compute derived properties from the register classes. 75 computeRegisterProperties(Subtarget->getRegisterInfo()); 76 77 // Transform loads and stores to pointers in address space 1 to loads and 78 // stores to WebAssembly global variables, outside linear memory. 79 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) { 80 setOperationAction(ISD::LOAD, T, Custom); 81 setOperationAction(ISD::STORE, T, Custom); 82 } 83 if (Subtarget->hasSIMD128()) { 84 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, 85 MVT::v2f64}) { 86 setOperationAction(ISD::LOAD, T, Custom); 87 setOperationAction(ISD::STORE, T, Custom); 88 } 89 } 90 if (Subtarget->hasReferenceTypes()) { 91 // We need custom load and store lowering for both externref, funcref and 92 // Other. The MVT::Other here represents tables of reference types. 93 for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) { 94 setOperationAction(ISD::LOAD, T, Custom); 95 setOperationAction(ISD::STORE, T, Custom); 96 } 97 } 98 99 setOperationAction(ISD::GlobalAddress, MVTPtr, Custom); 100 setOperationAction(ISD::GlobalTLSAddress, MVTPtr, Custom); 101 setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom); 102 setOperationAction(ISD::JumpTable, MVTPtr, Custom); 103 setOperationAction(ISD::BlockAddress, MVTPtr, Custom); 104 setOperationAction(ISD::BRIND, MVT::Other, Custom); 105 106 // Take the default expansion for va_arg, va_copy, and va_end. There is no 107 // default action for va_start, so we do that custom. 108 setOperationAction(ISD::VASTART, MVT::Other, Custom); 109 setOperationAction(ISD::VAARG, MVT::Other, Expand); 110 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 111 setOperationAction(ISD::VAEND, MVT::Other, Expand); 112 113 for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) { 114 // Don't expand the floating-point types to constant pools. 115 setOperationAction(ISD::ConstantFP, T, Legal); 116 // Expand floating-point comparisons. 117 for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE, 118 ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE}) 119 setCondCodeAction(CC, T, Expand); 120 // Expand floating-point library function operators. 121 for (auto Op : 122 {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA}) 123 setOperationAction(Op, T, Expand); 124 // Note supported floating-point library function operators that otherwise 125 // default to expand. 126 for (auto Op : 127 {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT}) 128 setOperationAction(Op, T, Legal); 129 // Support minimum and maximum, which otherwise default to expand. 130 setOperationAction(ISD::FMINIMUM, T, Legal); 131 setOperationAction(ISD::FMAXIMUM, T, Legal); 132 // WebAssembly currently has no builtin f16 support. 133 setOperationAction(ISD::FP16_TO_FP, T, Expand); 134 setOperationAction(ISD::FP_TO_FP16, T, Expand); 135 setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand); 136 setTruncStoreAction(T, MVT::f16, Expand); 137 } 138 139 // Expand unavailable integer operations. 140 for (auto Op : 141 {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU, 142 ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS, 143 ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) { 144 for (auto T : {MVT::i32, MVT::i64}) 145 setOperationAction(Op, T, Expand); 146 if (Subtarget->hasSIMD128()) 147 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) 148 setOperationAction(Op, T, Expand); 149 } 150 151 if (Subtarget->hasNontrappingFPToInt()) 152 for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}) 153 for (auto T : {MVT::i32, MVT::i64}) 154 setOperationAction(Op, T, Custom); 155 156 // SIMD-specific configuration 157 if (Subtarget->hasSIMD128()) { 158 // Hoist bitcasts out of shuffles 159 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 160 161 // Combine extends of extract_subvectors into widening ops 162 setTargetDAGCombine(ISD::SIGN_EXTEND); 163 setTargetDAGCombine(ISD::ZERO_EXTEND); 164 165 // Combine int_to_fp or fp_extend of extract_vectors and vice versa into 166 // conversions ops 167 setTargetDAGCombine(ISD::SINT_TO_FP); 168 setTargetDAGCombine(ISD::UINT_TO_FP); 169 setTargetDAGCombine(ISD::FP_EXTEND); 170 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR); 171 172 // Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa 173 // into conversion ops 174 setTargetDAGCombine(ISD::FP_TO_SINT_SAT); 175 setTargetDAGCombine(ISD::FP_TO_UINT_SAT); 176 setTargetDAGCombine(ISD::FP_ROUND); 177 setTargetDAGCombine(ISD::CONCAT_VECTORS); 178 179 // Support saturating add for i8x16 and i16x8 180 for (auto Op : {ISD::SADDSAT, ISD::UADDSAT}) 181 for (auto T : {MVT::v16i8, MVT::v8i16}) 182 setOperationAction(Op, T, Legal); 183 184 // Support integer abs 185 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) 186 setOperationAction(ISD::ABS, T, Legal); 187 188 // Custom lower BUILD_VECTORs to minimize number of replace_lanes 189 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, 190 MVT::v2f64}) 191 setOperationAction(ISD::BUILD_VECTOR, T, Custom); 192 193 // We have custom shuffle lowering to expose the shuffle mask 194 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, 195 MVT::v2f64}) 196 setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom); 197 198 // Custom lowering since wasm shifts must have a scalar shift amount 199 for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL}) 200 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) 201 setOperationAction(Op, T, Custom); 202 203 // Custom lower lane accesses to expand out variable indices 204 for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT}) 205 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, 206 MVT::v2f64}) 207 setOperationAction(Op, T, Custom); 208 209 // There is no i8x16.mul instruction 210 setOperationAction(ISD::MUL, MVT::v16i8, Expand); 211 212 // There is no vector conditional select instruction 213 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, 214 MVT::v2f64}) 215 setOperationAction(ISD::SELECT_CC, T, Expand); 216 217 // Expand integer operations supported for scalars but not SIMD 218 for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV, 219 ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR}) 220 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) 221 setOperationAction(Op, T, Expand); 222 223 // But we do have integer min and max operations 224 for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) 225 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) 226 setOperationAction(Op, T, Legal); 227 228 // And we have popcnt for i8x16 229 setOperationAction(ISD::CTPOP, MVT::v16i8, Legal); 230 231 // Expand float operations supported for scalars but not SIMD 232 for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, 233 ISD::FEXP, ISD::FEXP2, ISD::FRINT}) 234 for (auto T : {MVT::v4f32, MVT::v2f64}) 235 setOperationAction(Op, T, Expand); 236 237 // Unsigned comparison operations are unavailable for i64x2 vectors. 238 for (auto CC : {ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE}) 239 setCondCodeAction(CC, MVT::v2i64, Custom); 240 241 // 64x2 conversions are not in the spec 242 for (auto Op : 243 {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT}) 244 for (auto T : {MVT::v2i64, MVT::v2f64}) 245 setOperationAction(Op, T, Expand); 246 247 // But saturating fp_to_int converstions are 248 for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}) 249 setOperationAction(Op, MVT::v4i32, Custom); 250 } 251 252 // As a special case, these operators use the type to mean the type to 253 // sign-extend from. 254 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 255 if (!Subtarget->hasSignExt()) { 256 // Sign extends are legal only when extending a vector extract 257 auto Action = Subtarget->hasSIMD128() ? Custom : Expand; 258 for (auto T : {MVT::i8, MVT::i16, MVT::i32}) 259 setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action); 260 } 261 for (auto T : MVT::integer_fixedlen_vector_valuetypes()) 262 setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand); 263 264 // Dynamic stack allocation: use the default expansion. 265 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 266 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 267 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand); 268 269 setOperationAction(ISD::FrameIndex, MVT::i32, Custom); 270 setOperationAction(ISD::FrameIndex, MVT::i64, Custom); 271 setOperationAction(ISD::CopyToReg, MVT::Other, Custom); 272 273 // Expand these forms; we pattern-match the forms that we can handle in isel. 274 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) 275 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC}) 276 setOperationAction(Op, T, Expand); 277 278 // We have custom switch handling. 279 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 280 281 // WebAssembly doesn't have: 282 // - Floating-point extending loads. 283 // - Floating-point truncating stores. 284 // - i1 extending loads. 285 // - truncating SIMD stores and most extending loads 286 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 287 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 288 for (auto T : MVT::integer_valuetypes()) 289 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}) 290 setLoadExtAction(Ext, T, MVT::i1, Promote); 291 if (Subtarget->hasSIMD128()) { 292 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, 293 MVT::v2f64}) { 294 for (auto MemT : MVT::fixedlen_vector_valuetypes()) { 295 if (MVT(T) != MemT) { 296 setTruncStoreAction(T, MemT, Expand); 297 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}) 298 setLoadExtAction(Ext, T, MemT, Expand); 299 } 300 } 301 } 302 // But some vector extending loads are legal 303 for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) { 304 setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal); 305 setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal); 306 setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal); 307 } 308 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Legal); 309 } 310 311 // Don't do anything clever with build_pairs 312 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 313 314 // Trap lowers to wasm unreachable 315 setOperationAction(ISD::TRAP, MVT::Other, Legal); 316 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 317 318 // Exception handling intrinsics 319 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 320 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 321 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 322 323 setMaxAtomicSizeInBitsSupported(64); 324 325 // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is 326 // consistent with the f64 and f128 names. 327 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); 328 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); 329 330 // Define the emscripten name for return address helper. 331 // TODO: when implementing other Wasm backends, make this generic or only do 332 // this on emscripten depending on what they end up doing. 333 setLibcallName(RTLIB::RETURN_ADDRESS, "emscripten_return_address"); 334 335 // Always convert switches to br_tables unless there is only one case, which 336 // is equivalent to a simple branch. This reduces code size for wasm, and we 337 // defer possible jump table optimizations to the VM. 338 setMinimumJumpTableEntries(2); 339 } 340 341 MVT WebAssemblyTargetLowering::getPointerTy(const DataLayout &DL, 342 uint32_t AS) const { 343 if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF) 344 return MVT::externref; 345 if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF) 346 return MVT::funcref; 347 return TargetLowering::getPointerTy(DL, AS); 348 } 349 350 MVT WebAssemblyTargetLowering::getPointerMemTy(const DataLayout &DL, 351 uint32_t AS) const { 352 if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF) 353 return MVT::externref; 354 if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF) 355 return MVT::funcref; 356 return TargetLowering::getPointerMemTy(DL, AS); 357 } 358 359 TargetLowering::AtomicExpansionKind 360 WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { 361 // We have wasm instructions for these 362 switch (AI->getOperation()) { 363 case AtomicRMWInst::Add: 364 case AtomicRMWInst::Sub: 365 case AtomicRMWInst::And: 366 case AtomicRMWInst::Or: 367 case AtomicRMWInst::Xor: 368 case AtomicRMWInst::Xchg: 369 return AtomicExpansionKind::None; 370 default: 371 break; 372 } 373 return AtomicExpansionKind::CmpXChg; 374 } 375 376 bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const { 377 // Implementation copied from X86TargetLowering. 378 unsigned Opc = VecOp.getOpcode(); 379 380 // Assume target opcodes can't be scalarized. 381 // TODO - do we have any exceptions? 382 if (Opc >= ISD::BUILTIN_OP_END) 383 return false; 384 385 // If the vector op is not supported, try to convert to scalar. 386 EVT VecVT = VecOp.getValueType(); 387 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT)) 388 return true; 389 390 // If the vector op is supported, but the scalar op is not, the transform may 391 // not be worthwhile. 392 EVT ScalarVT = VecVT.getScalarType(); 393 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT); 394 } 395 396 FastISel *WebAssemblyTargetLowering::createFastISel( 397 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const { 398 return WebAssembly::createFastISel(FuncInfo, LibInfo); 399 } 400 401 MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/, 402 EVT VT) const { 403 unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1); 404 if (BitWidth > 1 && BitWidth < 8) 405 BitWidth = 8; 406 407 if (BitWidth > 64) { 408 // The shift will be lowered to a libcall, and compiler-rt libcalls expect 409 // the count to be an i32. 410 BitWidth = 32; 411 assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) && 412 "32-bit shift counts ought to be enough for anyone"); 413 } 414 415 MVT Result = MVT::getIntegerVT(BitWidth); 416 assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE && 417 "Unable to represent scalar shift amount type"); 418 return Result; 419 } 420 421 // Lower an fp-to-int conversion operator from the LLVM opcode, which has an 422 // undefined result on invalid/overflow, to the WebAssembly opcode, which 423 // traps on invalid/overflow. 424 static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL, 425 MachineBasicBlock *BB, 426 const TargetInstrInfo &TII, 427 bool IsUnsigned, bool Int64, 428 bool Float64, unsigned LoweredOpcode) { 429 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 430 431 Register OutReg = MI.getOperand(0).getReg(); 432 Register InReg = MI.getOperand(1).getReg(); 433 434 unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32; 435 unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32; 436 unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32; 437 unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32; 438 unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32; 439 unsigned Eqz = WebAssembly::EQZ_I32; 440 unsigned And = WebAssembly::AND_I32; 441 int64_t Limit = Int64 ? INT64_MIN : INT32_MIN; 442 int64_t Substitute = IsUnsigned ? 0 : Limit; 443 double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit; 444 auto &Context = BB->getParent()->getFunction().getContext(); 445 Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context); 446 447 const BasicBlock *LLVMBB = BB->getBasicBlock(); 448 MachineFunction *F = BB->getParent(); 449 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB); 450 MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB); 451 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB); 452 453 MachineFunction::iterator It = ++BB->getIterator(); 454 F->insert(It, FalseMBB); 455 F->insert(It, TrueMBB); 456 F->insert(It, DoneMBB); 457 458 // Transfer the remainder of BB and its successor edges to DoneMBB. 459 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end()); 460 DoneMBB->transferSuccessorsAndUpdatePHIs(BB); 461 462 BB->addSuccessor(TrueMBB); 463 BB->addSuccessor(FalseMBB); 464 TrueMBB->addSuccessor(DoneMBB); 465 FalseMBB->addSuccessor(DoneMBB); 466 467 unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg; 468 Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); 469 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); 470 CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 471 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 472 FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg)); 473 TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg)); 474 475 MI.eraseFromParent(); 476 // For signed numbers, we can do a single comparison to determine whether 477 // fabs(x) is within range. 478 if (IsUnsigned) { 479 Tmp0 = InReg; 480 } else { 481 BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg); 482 } 483 BuildMI(BB, DL, TII.get(FConst), Tmp1) 484 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal))); 485 BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1); 486 487 // For unsigned numbers, we have to do a separate comparison with zero. 488 if (IsUnsigned) { 489 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); 490 Register SecondCmpReg = 491 MRI.createVirtualRegister(&WebAssembly::I32RegClass); 492 Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); 493 BuildMI(BB, DL, TII.get(FConst), Tmp1) 494 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0))); 495 BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1); 496 BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg); 497 CmpReg = AndReg; 498 } 499 500 BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg); 501 502 // Create the CFG diamond to select between doing the conversion or using 503 // the substitute value. 504 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg); 505 BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg); 506 BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB); 507 BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute); 508 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg) 509 .addReg(FalseReg) 510 .addMBB(FalseMBB) 511 .addReg(TrueReg) 512 .addMBB(TrueMBB); 513 514 return DoneMBB; 515 } 516 517 static MachineBasicBlock * 518 LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, 519 const WebAssemblySubtarget *Subtarget, 520 const TargetInstrInfo &TII) { 521 MachineInstr &CallParams = *CallResults.getPrevNode(); 522 assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS); 523 assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS || 524 CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS); 525 526 bool IsIndirect = CallParams.getOperand(0).isReg(); 527 bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS; 528 529 bool IsFuncrefCall = false; 530 if (IsIndirect) { 531 Register Reg = CallParams.getOperand(0).getReg(); 532 const MachineFunction *MF = BB->getParent(); 533 const MachineRegisterInfo &MRI = MF->getRegInfo(); 534 const TargetRegisterClass *TRC = MRI.getRegClass(Reg); 535 IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass); 536 assert(!IsFuncrefCall || Subtarget->hasReferenceTypes()); 537 } 538 539 unsigned CallOp; 540 if (IsIndirect && IsRetCall) { 541 CallOp = WebAssembly::RET_CALL_INDIRECT; 542 } else if (IsIndirect) { 543 CallOp = WebAssembly::CALL_INDIRECT; 544 } else if (IsRetCall) { 545 CallOp = WebAssembly::RET_CALL; 546 } else { 547 CallOp = WebAssembly::CALL; 548 } 549 550 MachineFunction &MF = *BB->getParent(); 551 const MCInstrDesc &MCID = TII.get(CallOp); 552 MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL)); 553 554 // See if we must truncate the function pointer. 555 // CALL_INDIRECT takes an i32, but in wasm64 we represent function pointers 556 // as 64-bit for uniformity with other pointer types. 557 // See also: WebAssemblyFastISel::selectCall 558 if (IsIndirect && MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()) { 559 Register Reg32 = 560 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass); 561 auto &FnPtr = CallParams.getOperand(0); 562 BuildMI(*BB, CallResults.getIterator(), DL, 563 TII.get(WebAssembly::I32_WRAP_I64), Reg32) 564 .addReg(FnPtr.getReg()); 565 FnPtr.setReg(Reg32); 566 } 567 568 // Move the function pointer to the end of the arguments for indirect calls 569 if (IsIndirect) { 570 auto FnPtr = CallParams.getOperand(0); 571 CallParams.RemoveOperand(0); 572 573 // For funcrefs, call_indirect is done through __funcref_call_table and the 574 // funcref is always installed in slot 0 of the table, therefore instead of having 575 // the function pointer added at the end of the params list, a zero (the index in 576 // __funcref_call_table is added). 577 if (IsFuncrefCall) { 578 Register RegZero = 579 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass); 580 MachineInstrBuilder MIBC0 = 581 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0); 582 583 BB->insert(CallResults.getIterator(), MIBC0); 584 MachineInstrBuilder(MF, CallParams).addReg(RegZero); 585 } else 586 CallParams.addOperand(FnPtr); 587 } 588 589 for (auto Def : CallResults.defs()) 590 MIB.add(Def); 591 592 if (IsIndirect) { 593 // Placeholder for the type index. 594 MIB.addImm(0); 595 // The table into which this call_indirect indexes. 596 MCSymbolWasm *Table = IsFuncrefCall 597 ? WebAssembly::getOrCreateFuncrefCallTableSymbol( 598 MF.getContext(), Subtarget) 599 : WebAssembly::getOrCreateFunctionTableSymbol( 600 MF.getContext(), Subtarget); 601 if (Subtarget->hasReferenceTypes()) { 602 MIB.addSym(Table); 603 } else { 604 // For the MVP there is at most one table whose number is 0, but we can't 605 // write a table symbol or issue relocations. Instead we just ensure the 606 // table is live and write a zero. 607 Table->setNoStrip(); 608 MIB.addImm(0); 609 } 610 } 611 612 for (auto Use : CallParams.uses()) 613 MIB.add(Use); 614 615 BB->insert(CallResults.getIterator(), MIB); 616 CallParams.eraseFromParent(); 617 CallResults.eraseFromParent(); 618 619 // If this is a funcref call, to avoid hidden GC roots, we need to clear the 620 // table slot with ref.null upon call_indirect return. 621 // 622 // This generates the following code, which comes right after a call_indirect 623 // of a funcref: 624 // 625 // i32.const 0 626 // ref.null func 627 // table.set __funcref_call_table 628 if (IsIndirect && IsFuncrefCall) { 629 MCSymbolWasm *Table = WebAssembly::getOrCreateFuncrefCallTableSymbol( 630 MF.getContext(), Subtarget); 631 Register RegZero = 632 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass); 633 MachineInstr *Const0 = 634 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0); 635 BB->insertAfter(MIB.getInstr()->getIterator(), Const0); 636 637 Register RegFuncref = 638 MF.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass); 639 MachineInstr *RefNull = 640 BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref) 641 .addImm(static_cast<int32_t>(WebAssembly::HeapType::Funcref)); 642 BB->insertAfter(Const0->getIterator(), RefNull); 643 644 MachineInstr *TableSet = 645 BuildMI(MF, DL, TII.get(WebAssembly::TABLE_SET_FUNCREF)) 646 .addSym(Table) 647 .addReg(RegZero) 648 .addReg(RegFuncref); 649 BB->insertAfter(RefNull->getIterator(), TableSet); 650 } 651 652 return BB; 653 } 654 655 MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter( 656 MachineInstr &MI, MachineBasicBlock *BB) const { 657 const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); 658 DebugLoc DL = MI.getDebugLoc(); 659 660 switch (MI.getOpcode()) { 661 default: 662 llvm_unreachable("Unexpected instr type to insert"); 663 case WebAssembly::FP_TO_SINT_I32_F32: 664 return LowerFPToInt(MI, DL, BB, TII, false, false, false, 665 WebAssembly::I32_TRUNC_S_F32); 666 case WebAssembly::FP_TO_UINT_I32_F32: 667 return LowerFPToInt(MI, DL, BB, TII, true, false, false, 668 WebAssembly::I32_TRUNC_U_F32); 669 case WebAssembly::FP_TO_SINT_I64_F32: 670 return LowerFPToInt(MI, DL, BB, TII, false, true, false, 671 WebAssembly::I64_TRUNC_S_F32); 672 case WebAssembly::FP_TO_UINT_I64_F32: 673 return LowerFPToInt(MI, DL, BB, TII, true, true, false, 674 WebAssembly::I64_TRUNC_U_F32); 675 case WebAssembly::FP_TO_SINT_I32_F64: 676 return LowerFPToInt(MI, DL, BB, TII, false, false, true, 677 WebAssembly::I32_TRUNC_S_F64); 678 case WebAssembly::FP_TO_UINT_I32_F64: 679 return LowerFPToInt(MI, DL, BB, TII, true, false, true, 680 WebAssembly::I32_TRUNC_U_F64); 681 case WebAssembly::FP_TO_SINT_I64_F64: 682 return LowerFPToInt(MI, DL, BB, TII, false, true, true, 683 WebAssembly::I64_TRUNC_S_F64); 684 case WebAssembly::FP_TO_UINT_I64_F64: 685 return LowerFPToInt(MI, DL, BB, TII, true, true, true, 686 WebAssembly::I64_TRUNC_U_F64); 687 case WebAssembly::CALL_RESULTS: 688 case WebAssembly::RET_CALL_RESULTS: 689 return LowerCallResults(MI, DL, BB, Subtarget, TII); 690 } 691 } 692 693 const char * 694 WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const { 695 switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) { 696 case WebAssemblyISD::FIRST_NUMBER: 697 case WebAssemblyISD::FIRST_MEM_OPCODE: 698 break; 699 #define HANDLE_NODETYPE(NODE) \ 700 case WebAssemblyISD::NODE: \ 701 return "WebAssemblyISD::" #NODE; 702 #define HANDLE_MEM_NODETYPE(NODE) HANDLE_NODETYPE(NODE) 703 #include "WebAssemblyISD.def" 704 #undef HANDLE_MEM_NODETYPE 705 #undef HANDLE_NODETYPE 706 } 707 return nullptr; 708 } 709 710 std::pair<unsigned, const TargetRegisterClass *> 711 WebAssemblyTargetLowering::getRegForInlineAsmConstraint( 712 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { 713 // First, see if this is a constraint that directly corresponds to a 714 // WebAssembly register class. 715 if (Constraint.size() == 1) { 716 switch (Constraint[0]) { 717 case 'r': 718 assert(VT != MVT::iPTR && "Pointer MVT not expected here"); 719 if (Subtarget->hasSIMD128() && VT.isVector()) { 720 if (VT.getSizeInBits() == 128) 721 return std::make_pair(0U, &WebAssembly::V128RegClass); 722 } 723 if (VT.isInteger() && !VT.isVector()) { 724 if (VT.getSizeInBits() <= 32) 725 return std::make_pair(0U, &WebAssembly::I32RegClass); 726 if (VT.getSizeInBits() <= 64) 727 return std::make_pair(0U, &WebAssembly::I64RegClass); 728 } 729 if (VT.isFloatingPoint() && !VT.isVector()) { 730 switch (VT.getSizeInBits()) { 731 case 32: 732 return std::make_pair(0U, &WebAssembly::F32RegClass); 733 case 64: 734 return std::make_pair(0U, &WebAssembly::F64RegClass); 735 default: 736 break; 737 } 738 } 739 break; 740 default: 741 break; 742 } 743 } 744 745 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 746 } 747 748 bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const { 749 // Assume ctz is a relatively cheap operation. 750 return true; 751 } 752 753 bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const { 754 // Assume clz is a relatively cheap operation. 755 return true; 756 } 757 758 bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL, 759 const AddrMode &AM, 760 Type *Ty, unsigned AS, 761 Instruction *I) const { 762 // WebAssembly offsets are added as unsigned without wrapping. The 763 // isLegalAddressingMode gives us no way to determine if wrapping could be 764 // happening, so we approximate this by accepting only non-negative offsets. 765 if (AM.BaseOffs < 0) 766 return false; 767 768 // WebAssembly has no scale register operands. 769 if (AM.Scale != 0) 770 return false; 771 772 // Everything else is legal. 773 return true; 774 } 775 776 bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses( 777 EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/, 778 MachineMemOperand::Flags /*Flags*/, bool *Fast) const { 779 // WebAssembly supports unaligned accesses, though it should be declared 780 // with the p2align attribute on loads and stores which do so, and there 781 // may be a performance impact. We tell LLVM they're "fast" because 782 // for the kinds of things that LLVM uses this for (merging adjacent stores 783 // of constants, etc.), WebAssembly implementations will either want the 784 // unaligned access or they'll split anyway. 785 if (Fast) 786 *Fast = true; 787 return true; 788 } 789 790 bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT, 791 AttributeList Attr) const { 792 // The current thinking is that wasm engines will perform this optimization, 793 // so we can save on code size. 794 return true; 795 } 796 797 bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { 798 EVT ExtT = ExtVal.getValueType(); 799 EVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getValueType(0); 800 return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) || 801 (ExtT == MVT::v4i32 && MemT == MVT::v4i16) || 802 (ExtT == MVT::v2i64 && MemT == MVT::v2i32); 803 } 804 805 bool WebAssemblyTargetLowering::isOffsetFoldingLegal( 806 const GlobalAddressSDNode *GA) const { 807 // Wasm doesn't support function addresses with offsets 808 const GlobalValue *GV = GA->getGlobal(); 809 return isa<Function>(GV) ? false : TargetLowering::isOffsetFoldingLegal(GA); 810 } 811 812 EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL, 813 LLVMContext &C, 814 EVT VT) const { 815 if (VT.isVector()) 816 return VT.changeVectorElementTypeToInteger(); 817 818 // So far, all branch instructions in Wasm take an I32 condition. 819 // The default TargetLowering::getSetCCResultType returns the pointer size, 820 // which would be useful to reduce instruction counts when testing 821 // against 64-bit pointers/values if at some point Wasm supports that. 822 return EVT::getIntegerVT(C, 32); 823 } 824 825 bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 826 const CallInst &I, 827 MachineFunction &MF, 828 unsigned Intrinsic) const { 829 switch (Intrinsic) { 830 case Intrinsic::wasm_memory_atomic_notify: 831 Info.opc = ISD::INTRINSIC_W_CHAIN; 832 Info.memVT = MVT::i32; 833 Info.ptrVal = I.getArgOperand(0); 834 Info.offset = 0; 835 Info.align = Align(4); 836 // atomic.notify instruction does not really load the memory specified with 837 // this argument, but MachineMemOperand should either be load or store, so 838 // we set this to a load. 839 // FIXME Volatile isn't really correct, but currently all LLVM atomic 840 // instructions are treated as volatiles in the backend, so we should be 841 // consistent. The same applies for wasm_atomic_wait intrinsics too. 842 Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; 843 return true; 844 case Intrinsic::wasm_memory_atomic_wait32: 845 Info.opc = ISD::INTRINSIC_W_CHAIN; 846 Info.memVT = MVT::i32; 847 Info.ptrVal = I.getArgOperand(0); 848 Info.offset = 0; 849 Info.align = Align(4); 850 Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; 851 return true; 852 case Intrinsic::wasm_memory_atomic_wait64: 853 Info.opc = ISD::INTRINSIC_W_CHAIN; 854 Info.memVT = MVT::i64; 855 Info.ptrVal = I.getArgOperand(0); 856 Info.offset = 0; 857 Info.align = Align(8); 858 Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; 859 return true; 860 default: 861 return false; 862 } 863 } 864 865 void WebAssemblyTargetLowering::computeKnownBitsForTargetNode( 866 const SDValue Op, KnownBits &Known, const APInt &DemandedElts, 867 const SelectionDAG &DAG, unsigned Depth) const { 868 switch (Op.getOpcode()) { 869 default: 870 break; 871 case ISD::INTRINSIC_WO_CHAIN: { 872 unsigned IntNo = Op.getConstantOperandVal(0); 873 switch (IntNo) { 874 default: 875 break; 876 case Intrinsic::wasm_bitmask: { 877 unsigned BitWidth = Known.getBitWidth(); 878 EVT VT = Op.getOperand(1).getSimpleValueType(); 879 unsigned PossibleBits = VT.getVectorNumElements(); 880 APInt ZeroMask = APInt::getHighBitsSet(BitWidth, BitWidth - PossibleBits); 881 Known.Zero |= ZeroMask; 882 break; 883 } 884 } 885 } 886 } 887 } 888 889 TargetLoweringBase::LegalizeTypeAction 890 WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const { 891 if (VT.isFixedLengthVector()) { 892 MVT EltVT = VT.getVectorElementType(); 893 // We have legal vector types with these lane types, so widening the 894 // vector would let us use some of the lanes directly without having to 895 // extend or truncate values. 896 if (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 || 897 EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64) 898 return TypeWidenVector; 899 } 900 901 return TargetLoweringBase::getPreferredVectorAction(VT); 902 } 903 904 //===----------------------------------------------------------------------===// 905 // WebAssembly Lowering private implementation. 906 //===----------------------------------------------------------------------===// 907 908 //===----------------------------------------------------------------------===// 909 // Lowering Code 910 //===----------------------------------------------------------------------===// 911 912 static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) { 913 MachineFunction &MF = DAG.getMachineFunction(); 914 DAG.getContext()->diagnose( 915 DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc())); 916 } 917 918 // Test whether the given calling convention is supported. 919 static bool callingConvSupported(CallingConv::ID CallConv) { 920 // We currently support the language-independent target-independent 921 // conventions. We don't yet have a way to annotate calls with properties like 922 // "cold", and we don't have any call-clobbered registers, so these are mostly 923 // all handled the same. 924 return CallConv == CallingConv::C || CallConv == CallingConv::Fast || 925 CallConv == CallingConv::Cold || 926 CallConv == CallingConv::PreserveMost || 927 CallConv == CallingConv::PreserveAll || 928 CallConv == CallingConv::CXX_FAST_TLS || 929 CallConv == CallingConv::WASM_EmscriptenInvoke || 930 CallConv == CallingConv::Swift; 931 } 932 933 SDValue 934 WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, 935 SmallVectorImpl<SDValue> &InVals) const { 936 SelectionDAG &DAG = CLI.DAG; 937 SDLoc DL = CLI.DL; 938 SDValue Chain = CLI.Chain; 939 SDValue Callee = CLI.Callee; 940 MachineFunction &MF = DAG.getMachineFunction(); 941 auto Layout = MF.getDataLayout(); 942 943 CallingConv::ID CallConv = CLI.CallConv; 944 if (!callingConvSupported(CallConv)) 945 fail(DL, DAG, 946 "WebAssembly doesn't support language-specific or target-specific " 947 "calling conventions yet"); 948 if (CLI.IsPatchPoint) 949 fail(DL, DAG, "WebAssembly doesn't support patch point yet"); 950 951 if (CLI.IsTailCall) { 952 auto NoTail = [&](const char *Msg) { 953 if (CLI.CB && CLI.CB->isMustTailCall()) 954 fail(DL, DAG, Msg); 955 CLI.IsTailCall = false; 956 }; 957 958 if (!Subtarget->hasTailCall()) 959 NoTail("WebAssembly 'tail-call' feature not enabled"); 960 961 // Varargs calls cannot be tail calls because the buffer is on the stack 962 if (CLI.IsVarArg) 963 NoTail("WebAssembly does not support varargs tail calls"); 964 965 // Do not tail call unless caller and callee return types match 966 const Function &F = MF.getFunction(); 967 const TargetMachine &TM = getTargetMachine(); 968 Type *RetTy = F.getReturnType(); 969 SmallVector<MVT, 4> CallerRetTys; 970 SmallVector<MVT, 4> CalleeRetTys; 971 computeLegalValueVTs(F, TM, RetTy, CallerRetTys); 972 computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys); 973 bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() && 974 std::equal(CallerRetTys.begin(), CallerRetTys.end(), 975 CalleeRetTys.begin()); 976 if (!TypesMatch) 977 NoTail("WebAssembly tail call requires caller and callee return types to " 978 "match"); 979 980 // If pointers to local stack values are passed, we cannot tail call 981 if (CLI.CB) { 982 for (auto &Arg : CLI.CB->args()) { 983 Value *Val = Arg.get(); 984 // Trace the value back through pointer operations 985 while (true) { 986 Value *Src = Val->stripPointerCastsAndAliases(); 987 if (auto *GEP = dyn_cast<GetElementPtrInst>(Src)) 988 Src = GEP->getPointerOperand(); 989 if (Val == Src) 990 break; 991 Val = Src; 992 } 993 if (isa<AllocaInst>(Val)) { 994 NoTail( 995 "WebAssembly does not support tail calling with stack arguments"); 996 break; 997 } 998 } 999 } 1000 } 1001 1002 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 1003 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 1004 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 1005 1006 // The generic code may have added an sret argument. If we're lowering an 1007 // invoke function, the ABI requires that the function pointer be the first 1008 // argument, so we may have to swap the arguments. 1009 if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 && 1010 Outs[0].Flags.isSRet()) { 1011 std::swap(Outs[0], Outs[1]); 1012 std::swap(OutVals[0], OutVals[1]); 1013 } 1014 1015 bool HasSwiftSelfArg = false; 1016 bool HasSwiftErrorArg = false; 1017 unsigned NumFixedArgs = 0; 1018 for (unsigned I = 0; I < Outs.size(); ++I) { 1019 const ISD::OutputArg &Out = Outs[I]; 1020 SDValue &OutVal = OutVals[I]; 1021 HasSwiftSelfArg |= Out.Flags.isSwiftSelf(); 1022 HasSwiftErrorArg |= Out.Flags.isSwiftError(); 1023 if (Out.Flags.isNest()) 1024 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); 1025 if (Out.Flags.isInAlloca()) 1026 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); 1027 if (Out.Flags.isInConsecutiveRegs()) 1028 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); 1029 if (Out.Flags.isInConsecutiveRegsLast()) 1030 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); 1031 if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) { 1032 auto &MFI = MF.getFrameInfo(); 1033 int FI = MFI.CreateStackObject(Out.Flags.getByValSize(), 1034 Out.Flags.getNonZeroByValAlign(), 1035 /*isSS=*/false); 1036 SDValue SizeNode = 1037 DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32); 1038 SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout)); 1039 Chain = DAG.getMemcpy( 1040 Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getNonZeroByValAlign(), 1041 /*isVolatile*/ false, /*AlwaysInline=*/false, 1042 /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo()); 1043 OutVal = FINode; 1044 } 1045 // Count the number of fixed args *after* legalization. 1046 NumFixedArgs += Out.IsFixed; 1047 } 1048 1049 bool IsVarArg = CLI.IsVarArg; 1050 auto PtrVT = getPointerTy(Layout); 1051 1052 // For swiftcc, emit additional swiftself and swifterror arguments 1053 // if there aren't. These additional arguments are also added for callee 1054 // signature They are necessary to match callee and caller signature for 1055 // indirect call. 1056 if (CallConv == CallingConv::Swift) { 1057 if (!HasSwiftSelfArg) { 1058 NumFixedArgs++; 1059 ISD::OutputArg Arg; 1060 Arg.Flags.setSwiftSelf(); 1061 CLI.Outs.push_back(Arg); 1062 SDValue ArgVal = DAG.getUNDEF(PtrVT); 1063 CLI.OutVals.push_back(ArgVal); 1064 } 1065 if (!HasSwiftErrorArg) { 1066 NumFixedArgs++; 1067 ISD::OutputArg Arg; 1068 Arg.Flags.setSwiftError(); 1069 CLI.Outs.push_back(Arg); 1070 SDValue ArgVal = DAG.getUNDEF(PtrVT); 1071 CLI.OutVals.push_back(ArgVal); 1072 } 1073 } 1074 1075 // Analyze operands of the call, assigning locations to each operand. 1076 SmallVector<CCValAssign, 16> ArgLocs; 1077 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 1078 1079 if (IsVarArg) { 1080 // Outgoing non-fixed arguments are placed in a buffer. First 1081 // compute their offsets and the total amount of buffer space needed. 1082 for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) { 1083 const ISD::OutputArg &Out = Outs[I]; 1084 SDValue &Arg = OutVals[I]; 1085 EVT VT = Arg.getValueType(); 1086 assert(VT != MVT::iPTR && "Legalized args should be concrete"); 1087 Type *Ty = VT.getTypeForEVT(*DAG.getContext()); 1088 Align Alignment = 1089 std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty)); 1090 unsigned Offset = 1091 CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment); 1092 CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(), 1093 Offset, VT.getSimpleVT(), 1094 CCValAssign::Full)); 1095 } 1096 } 1097 1098 unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); 1099 1100 SDValue FINode; 1101 if (IsVarArg && NumBytes) { 1102 // For non-fixed arguments, next emit stores to store the argument values 1103 // to the stack buffer at the offsets computed above. 1104 int FI = MF.getFrameInfo().CreateStackObject(NumBytes, 1105 Layout.getStackAlignment(), 1106 /*isSS=*/false); 1107 unsigned ValNo = 0; 1108 SmallVector<SDValue, 8> Chains; 1109 for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) { 1110 assert(ArgLocs[ValNo].getValNo() == ValNo && 1111 "ArgLocs should remain in order and only hold varargs args"); 1112 unsigned Offset = ArgLocs[ValNo++].getLocMemOffset(); 1113 FINode = DAG.getFrameIndex(FI, getPointerTy(Layout)); 1114 SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode, 1115 DAG.getConstant(Offset, DL, PtrVT)); 1116 Chains.push_back( 1117 DAG.getStore(Chain, DL, Arg, Add, 1118 MachinePointerInfo::getFixedStack(MF, FI, Offset))); 1119 } 1120 if (!Chains.empty()) 1121 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); 1122 } else if (IsVarArg) { 1123 FINode = DAG.getIntPtrConstant(0, DL); 1124 } 1125 1126 if (Callee->getOpcode() == ISD::GlobalAddress) { 1127 // If the callee is a GlobalAddress node (quite common, every direct call 1128 // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress 1129 // doesn't at MO_GOT which is not needed for direct calls. 1130 GlobalAddressSDNode* GA = cast<GlobalAddressSDNode>(Callee); 1131 Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), DL, 1132 getPointerTy(DAG.getDataLayout()), 1133 GA->getOffset()); 1134 Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL, 1135 getPointerTy(DAG.getDataLayout()), Callee); 1136 } 1137 1138 // Compute the operands for the CALLn node. 1139 SmallVector<SDValue, 16> Ops; 1140 Ops.push_back(Chain); 1141 Ops.push_back(Callee); 1142 1143 // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs 1144 // isn't reliable. 1145 Ops.append(OutVals.begin(), 1146 IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end()); 1147 // Add a pointer to the vararg buffer. 1148 if (IsVarArg) 1149 Ops.push_back(FINode); 1150 1151 SmallVector<EVT, 8> InTys; 1152 for (const auto &In : Ins) { 1153 assert(!In.Flags.isByVal() && "byval is not valid for return values"); 1154 assert(!In.Flags.isNest() && "nest is not valid for return values"); 1155 if (In.Flags.isInAlloca()) 1156 fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values"); 1157 if (In.Flags.isInConsecutiveRegs()) 1158 fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values"); 1159 if (In.Flags.isInConsecutiveRegsLast()) 1160 fail(DL, DAG, 1161 "WebAssembly hasn't implemented cons regs last return values"); 1162 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in 1163 // registers. 1164 InTys.push_back(In.VT); 1165 } 1166 1167 // Lastly, if this is a call to a funcref we need to add an instruction 1168 // table.set to the chain and transform the call. 1169 if (CLI.CB && 1170 WebAssembly::isFuncrefType(CLI.CB->getCalledOperand()->getType())) { 1171 // In the absence of function references proposal where a funcref call is 1172 // lowered to call_ref, using reference types we generate a table.set to set 1173 // the funcref to a special table used solely for this purpose, followed by 1174 // a call_indirect. Here we just generate the table set, and return the 1175 // SDValue of the table.set so that LowerCall can finalize the lowering by 1176 // generating the call_indirect. 1177 SDValue Chain = Ops[0]; 1178 1179 MCSymbolWasm *Table = WebAssembly::getOrCreateFuncrefCallTableSymbol( 1180 MF.getContext(), Subtarget); 1181 SDValue Sym = DAG.getMCSymbol(Table, PtrVT); 1182 SDValue TableSlot = DAG.getConstant(0, DL, MVT::i32); 1183 SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee}; 1184 SDValue TableSet = DAG.getMemIntrinsicNode( 1185 WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps, 1186 MVT::funcref, 1187 // Machine Mem Operand args 1188 MachinePointerInfo( 1189 WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF), 1190 CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()), 1191 MachineMemOperand::MOStore); 1192 1193 Ops[0] = TableSet; // The new chain is the TableSet itself 1194 } 1195 1196 if (CLI.IsTailCall) { 1197 // ret_calls do not return values to the current frame 1198 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1199 return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops); 1200 } 1201 1202 InTys.push_back(MVT::Other); 1203 SDVTList InTyList = DAG.getVTList(InTys); 1204 SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops); 1205 1206 for (size_t I = 0; I < Ins.size(); ++I) 1207 InVals.push_back(Res.getValue(I)); 1208 1209 // Return the chain 1210 return Res.getValue(Ins.size()); 1211 } 1212 1213 bool WebAssemblyTargetLowering::CanLowerReturn( 1214 CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/, 1215 const SmallVectorImpl<ISD::OutputArg> &Outs, 1216 LLVMContext & /*Context*/) const { 1217 // WebAssembly can only handle returning tuples with multivalue enabled 1218 return Subtarget->hasMultivalue() || Outs.size() <= 1; 1219 } 1220 1221 SDValue WebAssemblyTargetLowering::LowerReturn( 1222 SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/, 1223 const SmallVectorImpl<ISD::OutputArg> &Outs, 1224 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 1225 SelectionDAG &DAG) const { 1226 assert((Subtarget->hasMultivalue() || Outs.size() <= 1) && 1227 "MVP WebAssembly can only return up to one value"); 1228 if (!callingConvSupported(CallConv)) 1229 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); 1230 1231 SmallVector<SDValue, 4> RetOps(1, Chain); 1232 RetOps.append(OutVals.begin(), OutVals.end()); 1233 Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps); 1234 1235 // Record the number and types of the return values. 1236 for (const ISD::OutputArg &Out : Outs) { 1237 assert(!Out.Flags.isByVal() && "byval is not valid for return values"); 1238 assert(!Out.Flags.isNest() && "nest is not valid for return values"); 1239 assert(Out.IsFixed && "non-fixed return value is not valid"); 1240 if (Out.Flags.isInAlloca()) 1241 fail(DL, DAG, "WebAssembly hasn't implemented inalloca results"); 1242 if (Out.Flags.isInConsecutiveRegs()) 1243 fail(DL, DAG, "WebAssembly hasn't implemented cons regs results"); 1244 if (Out.Flags.isInConsecutiveRegsLast()) 1245 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results"); 1246 } 1247 1248 return Chain; 1249 } 1250 1251 SDValue WebAssemblyTargetLowering::LowerFormalArguments( 1252 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 1253 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 1254 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 1255 if (!callingConvSupported(CallConv)) 1256 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); 1257 1258 MachineFunction &MF = DAG.getMachineFunction(); 1259 auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>(); 1260 1261 // Set up the incoming ARGUMENTS value, which serves to represent the liveness 1262 // of the incoming values before they're represented by virtual registers. 1263 MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS); 1264 1265 bool HasSwiftErrorArg = false; 1266 bool HasSwiftSelfArg = false; 1267 for (const ISD::InputArg &In : Ins) { 1268 HasSwiftSelfArg |= In.Flags.isSwiftSelf(); 1269 HasSwiftErrorArg |= In.Flags.isSwiftError(); 1270 if (In.Flags.isInAlloca()) 1271 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); 1272 if (In.Flags.isNest()) 1273 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); 1274 if (In.Flags.isInConsecutiveRegs()) 1275 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); 1276 if (In.Flags.isInConsecutiveRegsLast()) 1277 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); 1278 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in 1279 // registers. 1280 InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT, 1281 DAG.getTargetConstant(InVals.size(), 1282 DL, MVT::i32)) 1283 : DAG.getUNDEF(In.VT)); 1284 1285 // Record the number and types of arguments. 1286 MFI->addParam(In.VT); 1287 } 1288 1289 // For swiftcc, emit additional swiftself and swifterror arguments 1290 // if there aren't. These additional arguments are also added for callee 1291 // signature They are necessary to match callee and caller signature for 1292 // indirect call. 1293 auto PtrVT = getPointerTy(MF.getDataLayout()); 1294 if (CallConv == CallingConv::Swift) { 1295 if (!HasSwiftSelfArg) { 1296 MFI->addParam(PtrVT); 1297 } 1298 if (!HasSwiftErrorArg) { 1299 MFI->addParam(PtrVT); 1300 } 1301 } 1302 // Varargs are copied into a buffer allocated by the caller, and a pointer to 1303 // the buffer is passed as an argument. 1304 if (IsVarArg) { 1305 MVT PtrVT = getPointerTy(MF.getDataLayout()); 1306 Register VarargVreg = 1307 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT)); 1308 MFI->setVarargBufferVreg(VarargVreg); 1309 Chain = DAG.getCopyToReg( 1310 Chain, DL, VarargVreg, 1311 DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT, 1312 DAG.getTargetConstant(Ins.size(), DL, MVT::i32))); 1313 MFI->addParam(PtrVT); 1314 } 1315 1316 // Record the number and types of arguments and results. 1317 SmallVector<MVT, 4> Params; 1318 SmallVector<MVT, 4> Results; 1319 computeSignatureVTs(MF.getFunction().getFunctionType(), &MF.getFunction(), 1320 MF.getFunction(), DAG.getTarget(), Params, Results); 1321 for (MVT VT : Results) 1322 MFI->addResult(VT); 1323 // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify 1324 // the param logic here with ComputeSignatureVTs 1325 assert(MFI->getParams().size() == Params.size() && 1326 std::equal(MFI->getParams().begin(), MFI->getParams().end(), 1327 Params.begin())); 1328 1329 return Chain; 1330 } 1331 1332 void WebAssemblyTargetLowering::ReplaceNodeResults( 1333 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 1334 switch (N->getOpcode()) { 1335 case ISD::SIGN_EXTEND_INREG: 1336 // Do not add any results, signifying that N should not be custom lowered 1337 // after all. This happens because simd128 turns on custom lowering for 1338 // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an 1339 // illegal type. 1340 break; 1341 default: 1342 llvm_unreachable( 1343 "ReplaceNodeResults not implemented for this op for WebAssembly!"); 1344 } 1345 } 1346 1347 //===----------------------------------------------------------------------===// 1348 // Custom lowering hooks. 1349 //===----------------------------------------------------------------------===// 1350 1351 SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op, 1352 SelectionDAG &DAG) const { 1353 SDLoc DL(Op); 1354 switch (Op.getOpcode()) { 1355 default: 1356 llvm_unreachable("unimplemented operation lowering"); 1357 return SDValue(); 1358 case ISD::FrameIndex: 1359 return LowerFrameIndex(Op, DAG); 1360 case ISD::GlobalAddress: 1361 return LowerGlobalAddress(Op, DAG); 1362 case ISD::GlobalTLSAddress: 1363 return LowerGlobalTLSAddress(Op, DAG); 1364 case ISD::ExternalSymbol: 1365 return LowerExternalSymbol(Op, DAG); 1366 case ISD::JumpTable: 1367 return LowerJumpTable(Op, DAG); 1368 case ISD::BR_JT: 1369 return LowerBR_JT(Op, DAG); 1370 case ISD::VASTART: 1371 return LowerVASTART(Op, DAG); 1372 case ISD::BlockAddress: 1373 case ISD::BRIND: 1374 fail(DL, DAG, "WebAssembly hasn't implemented computed gotos"); 1375 return SDValue(); 1376 case ISD::RETURNADDR: 1377 return LowerRETURNADDR(Op, DAG); 1378 case ISD::FRAMEADDR: 1379 return LowerFRAMEADDR(Op, DAG); 1380 case ISD::CopyToReg: 1381 return LowerCopyToReg(Op, DAG); 1382 case ISD::EXTRACT_VECTOR_ELT: 1383 case ISD::INSERT_VECTOR_ELT: 1384 return LowerAccessVectorElement(Op, DAG); 1385 case ISD::INTRINSIC_VOID: 1386 case ISD::INTRINSIC_WO_CHAIN: 1387 case ISD::INTRINSIC_W_CHAIN: 1388 return LowerIntrinsic(Op, DAG); 1389 case ISD::SIGN_EXTEND_INREG: 1390 return LowerSIGN_EXTEND_INREG(Op, DAG); 1391 case ISD::BUILD_VECTOR: 1392 return LowerBUILD_VECTOR(Op, DAG); 1393 case ISD::VECTOR_SHUFFLE: 1394 return LowerVECTOR_SHUFFLE(Op, DAG); 1395 case ISD::SETCC: 1396 return LowerSETCC(Op, DAG); 1397 case ISD::SHL: 1398 case ISD::SRA: 1399 case ISD::SRL: 1400 return LowerShift(Op, DAG); 1401 case ISD::FP_TO_SINT_SAT: 1402 case ISD::FP_TO_UINT_SAT: 1403 return LowerFP_TO_INT_SAT(Op, DAG); 1404 case ISD::LOAD: 1405 return LowerLoad(Op, DAG); 1406 case ISD::STORE: 1407 return LowerStore(Op, DAG); 1408 } 1409 } 1410 1411 static bool IsWebAssemblyGlobal(SDValue Op) { 1412 if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) 1413 return WebAssembly::isWasmVarAddressSpace(GA->getAddressSpace()); 1414 1415 return false; 1416 } 1417 1418 static Optional<unsigned> IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG) { 1419 const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op); 1420 if (!FI) 1421 return None; 1422 1423 auto &MF = DAG.getMachineFunction(); 1424 return WebAssemblyFrameLowering::getLocalForStackObject(MF, FI->getIndex()); 1425 } 1426 1427 static bool IsWebAssemblyTable(SDValue Op) { 1428 const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); 1429 if (GA && WebAssembly::isWasmVarAddressSpace(GA->getAddressSpace())) { 1430 const GlobalValue *Value = GA->getGlobal(); 1431 const Type *Ty = Value->getValueType(); 1432 1433 if (Ty->isArrayTy() && WebAssembly::isRefType(Ty->getArrayElementType())) 1434 return true; 1435 } 1436 return false; 1437 } 1438 1439 // This function will accept as Op any access to a table, so Op can 1440 // be the actual table or an offset into the table. 1441 static bool IsWebAssemblyTableWithOffset(SDValue Op) { 1442 if (Op->getOpcode() == ISD::ADD && Op->getNumOperands() == 2) 1443 return (Op->getOperand(1).getSimpleValueType() == MVT::i32 && 1444 IsWebAssemblyTableWithOffset(Op->getOperand(0))) || 1445 (Op->getOperand(0).getSimpleValueType() == MVT::i32 && 1446 IsWebAssemblyTableWithOffset(Op->getOperand(1))); 1447 1448 return IsWebAssemblyTable(Op); 1449 } 1450 1451 // Helper for table pattern matching used in LowerStore and LowerLoad 1452 bool WebAssemblyTargetLowering::MatchTableForLowering(SelectionDAG &DAG, 1453 const SDLoc &DL, 1454 const SDValue &Base, 1455 GlobalAddressSDNode *&GA, 1456 SDValue &Idx) const { 1457 // We expect the following graph for a load of the form: 1458 // table[<var> + <constant offset>] 1459 // 1460 // Case 1: 1461 // externref = load t1 1462 // t1: i32 = add t2, i32:<constant offset> 1463 // t2: i32 = add tX, table 1464 // 1465 // This is in some cases simplified to just: 1466 // Case 2: 1467 // externref = load t1 1468 // t1: i32 = add t2, i32:tX 1469 // 1470 // So, unfortunately we need to check for both cases and if we are in the 1471 // first case extract the table GlobalAddressNode and build a new node tY 1472 // that's tY: i32 = add i32:<constant offset>, i32:tX 1473 // 1474 if (IsWebAssemblyTable(Base)) { 1475 GA = cast<GlobalAddressSDNode>(Base); 1476 Idx = DAG.getConstant(0, DL, MVT::i32); 1477 } else { 1478 GA = dyn_cast<GlobalAddressSDNode>(Base->getOperand(0)); 1479 if (GA) { 1480 // We are in Case 2 above. 1481 Idx = Base->getOperand(1); 1482 if (!Idx || GA->getNumValues() != 1 || Idx->getNumValues() != 1) 1483 return false; 1484 } else { 1485 // This might be Case 1 above (or an error) 1486 SDValue V = Base->getOperand(0); 1487 GA = dyn_cast<GlobalAddressSDNode>(V->getOperand(1)); 1488 1489 if (V->getOpcode() != ISD::ADD || V->getNumOperands() != 2 || !GA) 1490 return false; 1491 1492 SDValue IdxV = DAG.getNode(ISD::ADD, DL, MVT::i32, Base->getOperand(1), 1493 V->getOperand(0)); 1494 Idx = IdxV; 1495 } 1496 } 1497 1498 return true; 1499 } 1500 1501 SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op, 1502 SelectionDAG &DAG) const { 1503 SDLoc DL(Op); 1504 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode()); 1505 const SDValue &Value = SN->getValue(); 1506 const SDValue &Base = SN->getBasePtr(); 1507 const SDValue &Offset = SN->getOffset(); 1508 1509 if (IsWebAssemblyTableWithOffset(Base)) { 1510 if (!Offset->isUndef()) 1511 report_fatal_error( 1512 "unexpected offset when loading from webassembly table", false); 1513 1514 SDValue Idx; 1515 GlobalAddressSDNode *GA; 1516 1517 if (!MatchTableForLowering(DAG, DL, Base, GA, Idx)) 1518 report_fatal_error("failed pattern matching for lowering table store", 1519 false); 1520 1521 SDVTList Tys = DAG.getVTList(MVT::Other); 1522 SDValue TableSetOps[] = {SN->getChain(), SDValue(GA, 0), Idx, Value}; 1523 SDValue TableSet = 1524 DAG.getMemIntrinsicNode(WebAssemblyISD::TABLE_SET, DL, Tys, TableSetOps, 1525 SN->getMemoryVT(), SN->getMemOperand()); 1526 return TableSet; 1527 } 1528 1529 if (IsWebAssemblyGlobal(Base)) { 1530 if (!Offset->isUndef()) 1531 report_fatal_error("unexpected offset when storing to webassembly global", 1532 false); 1533 1534 SDVTList Tys = DAG.getVTList(MVT::Other); 1535 SDValue Ops[] = {SN->getChain(), Value, Base}; 1536 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_SET, DL, Tys, Ops, 1537 SN->getMemoryVT(), SN->getMemOperand()); 1538 } 1539 1540 if (Optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) { 1541 if (!Offset->isUndef()) 1542 report_fatal_error("unexpected offset when storing to webassembly local", 1543 false); 1544 1545 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32); 1546 SDVTList Tys = DAG.getVTList(MVT::Other); // The chain. 1547 SDValue Ops[] = {SN->getChain(), Idx, Value}; 1548 return DAG.getNode(WebAssemblyISD::LOCAL_SET, DL, Tys, Ops); 1549 } 1550 1551 return Op; 1552 } 1553 1554 SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op, 1555 SelectionDAG &DAG) const { 1556 SDLoc DL(Op); 1557 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode()); 1558 const SDValue &Base = LN->getBasePtr(); 1559 const SDValue &Offset = LN->getOffset(); 1560 1561 if (IsWebAssemblyTableWithOffset(Base)) { 1562 if (!Offset->isUndef()) 1563 report_fatal_error( 1564 "unexpected offset when loading from webassembly table", false); 1565 1566 GlobalAddressSDNode *GA; 1567 SDValue Idx; 1568 1569 if (!MatchTableForLowering(DAG, DL, Base, GA, Idx)) 1570 report_fatal_error("failed pattern matching for lowering table load", 1571 false); 1572 1573 SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other); 1574 SDValue TableGetOps[] = {LN->getChain(), SDValue(GA, 0), Idx}; 1575 SDValue TableGet = 1576 DAG.getMemIntrinsicNode(WebAssemblyISD::TABLE_GET, DL, Tys, TableGetOps, 1577 LN->getMemoryVT(), LN->getMemOperand()); 1578 return TableGet; 1579 } 1580 1581 if (IsWebAssemblyGlobal(Base)) { 1582 if (!Offset->isUndef()) 1583 report_fatal_error( 1584 "unexpected offset when loading from webassembly global", false); 1585 1586 SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other); 1587 SDValue Ops[] = {LN->getChain(), Base}; 1588 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops, 1589 LN->getMemoryVT(), LN->getMemOperand()); 1590 } 1591 1592 if (Optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) { 1593 if (!Offset->isUndef()) 1594 report_fatal_error( 1595 "unexpected offset when loading from webassembly local", false); 1596 1597 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32); 1598 EVT LocalVT = LN->getValueType(0); 1599 SDValue LocalGet = DAG.getNode(WebAssemblyISD::LOCAL_GET, DL, LocalVT, 1600 {LN->getChain(), Idx}); 1601 SDValue Result = DAG.getMergeValues({LocalGet, LN->getChain()}, DL); 1602 assert(Result->getNumValues() == 2 && "Loads must carry a chain!"); 1603 return Result; 1604 } 1605 1606 return Op; 1607 } 1608 1609 SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op, 1610 SelectionDAG &DAG) const { 1611 SDValue Src = Op.getOperand(2); 1612 if (isa<FrameIndexSDNode>(Src.getNode())) { 1613 // CopyToReg nodes don't support FrameIndex operands. Other targets select 1614 // the FI to some LEA-like instruction, but since we don't have that, we 1615 // need to insert some kind of instruction that can take an FI operand and 1616 // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy 1617 // local.copy between Op and its FI operand. 1618 SDValue Chain = Op.getOperand(0); 1619 SDLoc DL(Op); 1620 unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg(); 1621 EVT VT = Src.getValueType(); 1622 SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32 1623 : WebAssembly::COPY_I64, 1624 DL, VT, Src), 1625 0); 1626 return Op.getNode()->getNumValues() == 1 1627 ? DAG.getCopyToReg(Chain, DL, Reg, Copy) 1628 : DAG.getCopyToReg(Chain, DL, Reg, Copy, 1629 Op.getNumOperands() == 4 ? Op.getOperand(3) 1630 : SDValue()); 1631 } 1632 return SDValue(); 1633 } 1634 1635 SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op, 1636 SelectionDAG &DAG) const { 1637 int FI = cast<FrameIndexSDNode>(Op)->getIndex(); 1638 return DAG.getTargetFrameIndex(FI, Op.getValueType()); 1639 } 1640 1641 SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op, 1642 SelectionDAG &DAG) const { 1643 SDLoc DL(Op); 1644 1645 if (!Subtarget->getTargetTriple().isOSEmscripten()) { 1646 fail(DL, DAG, 1647 "Non-Emscripten WebAssembly hasn't implemented " 1648 "__builtin_return_address"); 1649 return SDValue(); 1650 } 1651 1652 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 1653 return SDValue(); 1654 1655 unsigned Depth = Op.getConstantOperandVal(0); 1656 MakeLibCallOptions CallOptions; 1657 return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(), 1658 {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL) 1659 .first; 1660 } 1661 1662 SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op, 1663 SelectionDAG &DAG) const { 1664 // Non-zero depths are not supported by WebAssembly currently. Use the 1665 // legalizer's default expansion, which is to return 0 (what this function is 1666 // documented to do). 1667 if (Op.getConstantOperandVal(0) > 0) 1668 return SDValue(); 1669 1670 DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true); 1671 EVT VT = Op.getValueType(); 1672 Register FP = 1673 Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction()); 1674 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT); 1675 } 1676 1677 SDValue 1678 WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op, 1679 SelectionDAG &DAG) const { 1680 SDLoc DL(Op); 1681 const auto *GA = cast<GlobalAddressSDNode>(Op); 1682 1683 MachineFunction &MF = DAG.getMachineFunction(); 1684 if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory()) 1685 report_fatal_error("cannot use thread-local storage without bulk memory", 1686 false); 1687 1688 const GlobalValue *GV = GA->getGlobal(); 1689 1690 // Currently Emscripten does not support dynamic linking with threads. 1691 // Therefore, if we have thread-local storage, only the local-exec model 1692 // is possible. 1693 // TODO: remove this and implement proper TLS models once Emscripten 1694 // supports dynamic linking with threads. 1695 if (GV->getThreadLocalMode() != GlobalValue::LocalExecTLSModel && 1696 !Subtarget->getTargetTriple().isOSEmscripten()) { 1697 report_fatal_error("only -ftls-model=local-exec is supported for now on " 1698 "non-Emscripten OSes: variable " + 1699 GV->getName(), 1700 false); 1701 } 1702 1703 auto model = GV->getThreadLocalMode(); 1704 1705 // Unsupported TLS modes 1706 assert(model != GlobalValue::NotThreadLocal); 1707 assert(model != GlobalValue::InitialExecTLSModel); 1708 1709 if (model == GlobalValue::LocalExecTLSModel || 1710 model == GlobalValue::LocalDynamicTLSModel || 1711 (model == GlobalValue::GeneralDynamicTLSModel && 1712 getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))) { 1713 // For DSO-local TLS variables we use offset from __tls_base 1714 1715 MVT PtrVT = getPointerTy(DAG.getDataLayout()); 1716 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64 1717 : WebAssembly::GLOBAL_GET_I32; 1718 const char *BaseName = MF.createExternalSymbolName("__tls_base"); 1719 1720 SDValue BaseAddr( 1721 DAG.getMachineNode(GlobalGet, DL, PtrVT, 1722 DAG.getTargetExternalSymbol(BaseName, PtrVT)), 1723 0); 1724 1725 SDValue TLSOffset = DAG.getTargetGlobalAddress( 1726 GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL); 1727 SDValue SymOffset = 1728 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, TLSOffset); 1729 1730 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymOffset); 1731 } 1732 1733 assert(model == GlobalValue::GeneralDynamicTLSModel); 1734 1735 EVT VT = Op.getValueType(); 1736 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, 1737 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, 1738 GA->getOffset(), 1739 WebAssemblyII::MO_GOT_TLS)); 1740 } 1741 1742 SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op, 1743 SelectionDAG &DAG) const { 1744 SDLoc DL(Op); 1745 const auto *GA = cast<GlobalAddressSDNode>(Op); 1746 EVT VT = Op.getValueType(); 1747 assert(GA->getTargetFlags() == 0 && 1748 "Unexpected target flags on generic GlobalAddressSDNode"); 1749 if (!WebAssembly::isValidAddressSpace(GA->getAddressSpace())) 1750 fail(DL, DAG, "Invalid address space for WebAssembly target"); 1751 1752 unsigned OperandFlags = 0; 1753 if (isPositionIndependent()) { 1754 const GlobalValue *GV = GA->getGlobal(); 1755 if (getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) { 1756 MachineFunction &MF = DAG.getMachineFunction(); 1757 MVT PtrVT = getPointerTy(MF.getDataLayout()); 1758 const char *BaseName; 1759 if (GV->getValueType()->isFunctionTy()) { 1760 BaseName = MF.createExternalSymbolName("__table_base"); 1761 OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL; 1762 } 1763 else { 1764 BaseName = MF.createExternalSymbolName("__memory_base"); 1765 OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL; 1766 } 1767 SDValue BaseAddr = 1768 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, 1769 DAG.getTargetExternalSymbol(BaseName, PtrVT)); 1770 1771 SDValue SymAddr = DAG.getNode( 1772 WebAssemblyISD::WrapperREL, DL, VT, 1773 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(), 1774 OperandFlags)); 1775 1776 return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr); 1777 } 1778 OperandFlags = WebAssemblyII::MO_GOT; 1779 } 1780 1781 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, 1782 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, 1783 GA->getOffset(), OperandFlags)); 1784 } 1785 1786 SDValue 1787 WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op, 1788 SelectionDAG &DAG) const { 1789 SDLoc DL(Op); 1790 const auto *ES = cast<ExternalSymbolSDNode>(Op); 1791 EVT VT = Op.getValueType(); 1792 assert(ES->getTargetFlags() == 0 && 1793 "Unexpected target flags on generic ExternalSymbolSDNode"); 1794 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, 1795 DAG.getTargetExternalSymbol(ES->getSymbol(), VT)); 1796 } 1797 1798 SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op, 1799 SelectionDAG &DAG) const { 1800 // There's no need for a Wrapper node because we always incorporate a jump 1801 // table operand into a BR_TABLE instruction, rather than ever 1802 // materializing it in a register. 1803 const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 1804 return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(), 1805 JT->getTargetFlags()); 1806 } 1807 1808 SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op, 1809 SelectionDAG &DAG) const { 1810 SDLoc DL(Op); 1811 SDValue Chain = Op.getOperand(0); 1812 const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1)); 1813 SDValue Index = Op.getOperand(2); 1814 assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags"); 1815 1816 SmallVector<SDValue, 8> Ops; 1817 Ops.push_back(Chain); 1818 Ops.push_back(Index); 1819 1820 MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo(); 1821 const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs; 1822 1823 // Add an operand for each case. 1824 for (auto MBB : MBBs) 1825 Ops.push_back(DAG.getBasicBlock(MBB)); 1826 1827 // Add the first MBB as a dummy default target for now. This will be replaced 1828 // with the proper default target (and the preceding range check eliminated) 1829 // if possible by WebAssemblyFixBrTableDefaults. 1830 Ops.push_back(DAG.getBasicBlock(*MBBs.begin())); 1831 return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops); 1832 } 1833 1834 SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op, 1835 SelectionDAG &DAG) const { 1836 SDLoc DL(Op); 1837 EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout()); 1838 1839 auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>(); 1840 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1841 1842 SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL, 1843 MFI->getVarargBufferVreg(), PtrVT); 1844 return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1), 1845 MachinePointerInfo(SV)); 1846 } 1847 1848 SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op, 1849 SelectionDAG &DAG) const { 1850 MachineFunction &MF = DAG.getMachineFunction(); 1851 unsigned IntNo; 1852 switch (Op.getOpcode()) { 1853 case ISD::INTRINSIC_VOID: 1854 case ISD::INTRINSIC_W_CHAIN: 1855 IntNo = Op.getConstantOperandVal(1); 1856 break; 1857 case ISD::INTRINSIC_WO_CHAIN: 1858 IntNo = Op.getConstantOperandVal(0); 1859 break; 1860 default: 1861 llvm_unreachable("Invalid intrinsic"); 1862 } 1863 SDLoc DL(Op); 1864 1865 switch (IntNo) { 1866 default: 1867 return SDValue(); // Don't custom lower most intrinsics. 1868 1869 case Intrinsic::wasm_lsda: { 1870 auto PtrVT = getPointerTy(MF.getDataLayout()); 1871 const char *SymName = MF.createExternalSymbolName( 1872 "GCC_except_table" + std::to_string(MF.getFunctionNumber())); 1873 if (isPositionIndependent()) { 1874 SDValue Node = DAG.getTargetExternalSymbol( 1875 SymName, PtrVT, WebAssemblyII::MO_MEMORY_BASE_REL); 1876 const char *BaseName = MF.createExternalSymbolName("__memory_base"); 1877 SDValue BaseAddr = 1878 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, 1879 DAG.getTargetExternalSymbol(BaseName, PtrVT)); 1880 SDValue SymAddr = 1881 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, Node); 1882 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr); 1883 } 1884 SDValue Node = DAG.getTargetExternalSymbol(SymName, PtrVT); 1885 return DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, Node); 1886 } 1887 1888 case Intrinsic::wasm_shuffle: { 1889 // Drop in-chain and replace undefs, but otherwise pass through unchanged 1890 SDValue Ops[18]; 1891 size_t OpIdx = 0; 1892 Ops[OpIdx++] = Op.getOperand(1); 1893 Ops[OpIdx++] = Op.getOperand(2); 1894 while (OpIdx < 18) { 1895 const SDValue &MaskIdx = Op.getOperand(OpIdx + 1); 1896 if (MaskIdx.isUndef() || 1897 cast<ConstantSDNode>(MaskIdx.getNode())->getZExtValue() >= 32) { 1898 Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32); 1899 } else { 1900 Ops[OpIdx++] = MaskIdx; 1901 } 1902 } 1903 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops); 1904 } 1905 } 1906 } 1907 1908 SDValue 1909 WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, 1910 SelectionDAG &DAG) const { 1911 SDLoc DL(Op); 1912 // If sign extension operations are disabled, allow sext_inreg only if operand 1913 // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign 1914 // extension operations, but allowing sext_inreg in this context lets us have 1915 // simple patterns to select extract_lane_s instructions. Expanding sext_inreg 1916 // everywhere would be simpler in this file, but would necessitate large and 1917 // brittle patterns to undo the expansion and select extract_lane_s 1918 // instructions. 1919 assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128()); 1920 if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT) 1921 return SDValue(); 1922 1923 const SDValue &Extract = Op.getOperand(0); 1924 MVT VecT = Extract.getOperand(0).getSimpleValueType(); 1925 if (VecT.getVectorElementType().getSizeInBits() > 32) 1926 return SDValue(); 1927 MVT ExtractedLaneT = 1928 cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT(); 1929 MVT ExtractedVecT = 1930 MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits()); 1931 if (ExtractedVecT == VecT) 1932 return Op; 1933 1934 // Bitcast vector to appropriate type to ensure ISel pattern coverage 1935 const SDNode *Index = Extract.getOperand(1).getNode(); 1936 if (!isa<ConstantSDNode>(Index)) 1937 return SDValue(); 1938 unsigned IndexVal = cast<ConstantSDNode>(Index)->getZExtValue(); 1939 unsigned Scale = 1940 ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements(); 1941 assert(Scale > 1); 1942 SDValue NewIndex = 1943 DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0)); 1944 SDValue NewExtract = DAG.getNode( 1945 ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(), 1946 DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex); 1947 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract, 1948 Op.getOperand(1)); 1949 } 1950 1951 static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) { 1952 SDLoc DL(Op); 1953 if (Op.getValueType() != MVT::v2f64) 1954 return SDValue(); 1955 1956 auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec, 1957 unsigned &Index) -> bool { 1958 switch (Op.getOpcode()) { 1959 case ISD::SINT_TO_FP: 1960 Opcode = WebAssemblyISD::CONVERT_LOW_S; 1961 break; 1962 case ISD::UINT_TO_FP: 1963 Opcode = WebAssemblyISD::CONVERT_LOW_U; 1964 break; 1965 case ISD::FP_EXTEND: 1966 Opcode = WebAssemblyISD::PROMOTE_LOW; 1967 break; 1968 default: 1969 return false; 1970 } 1971 1972 auto ExtractVector = Op.getOperand(0); 1973 if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT) 1974 return false; 1975 1976 if (!isa<ConstantSDNode>(ExtractVector.getOperand(1).getNode())) 1977 return false; 1978 1979 SrcVec = ExtractVector.getOperand(0); 1980 Index = ExtractVector.getConstantOperandVal(1); 1981 return true; 1982 }; 1983 1984 unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex; 1985 SDValue LHSSrcVec, RHSSrcVec; 1986 if (!GetConvertedLane(Op.getOperand(0), LHSOpcode, LHSSrcVec, LHSIndex) || 1987 !GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex)) 1988 return SDValue(); 1989 1990 if (LHSOpcode != RHSOpcode) 1991 return SDValue(); 1992 1993 MVT ExpectedSrcVT; 1994 switch (LHSOpcode) { 1995 case WebAssemblyISD::CONVERT_LOW_S: 1996 case WebAssemblyISD::CONVERT_LOW_U: 1997 ExpectedSrcVT = MVT::v4i32; 1998 break; 1999 case WebAssemblyISD::PROMOTE_LOW: 2000 ExpectedSrcVT = MVT::v4f32; 2001 break; 2002 } 2003 if (LHSSrcVec.getValueType() != ExpectedSrcVT) 2004 return SDValue(); 2005 2006 auto Src = LHSSrcVec; 2007 if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) { 2008 // Shuffle the source vector so that the converted lanes are the low lanes. 2009 Src = DAG.getVectorShuffle( 2010 ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec, 2011 {static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + 4, -1, -1}); 2012 } 2013 return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src); 2014 } 2015 2016 SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, 2017 SelectionDAG &DAG) const { 2018 if (auto ConvertLow = LowerConvertLow(Op, DAG)) 2019 return ConvertLow; 2020 2021 SDLoc DL(Op); 2022 const EVT VecT = Op.getValueType(); 2023 const EVT LaneT = Op.getOperand(0).getValueType(); 2024 const size_t Lanes = Op.getNumOperands(); 2025 bool CanSwizzle = VecT == MVT::v16i8; 2026 2027 // BUILD_VECTORs are lowered to the instruction that initializes the highest 2028 // possible number of lanes at once followed by a sequence of replace_lane 2029 // instructions to individually initialize any remaining lanes. 2030 2031 // TODO: Tune this. For example, lanewise swizzling is very expensive, so 2032 // swizzled lanes should be given greater weight. 2033 2034 // TODO: Investigate looping rather than always extracting/replacing specific 2035 // lanes to fill gaps. 2036 2037 auto IsConstant = [](const SDValue &V) { 2038 return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP; 2039 }; 2040 2041 // Returns the source vector and index vector pair if they exist. Checks for: 2042 // (extract_vector_elt 2043 // $src, 2044 // (sign_extend_inreg (extract_vector_elt $indices, $i)) 2045 // ) 2046 auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) { 2047 auto Bail = std::make_pair(SDValue(), SDValue()); 2048 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 2049 return Bail; 2050 const SDValue &SwizzleSrc = Lane->getOperand(0); 2051 const SDValue &IndexExt = Lane->getOperand(1); 2052 if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG) 2053 return Bail; 2054 const SDValue &Index = IndexExt->getOperand(0); 2055 if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 2056 return Bail; 2057 const SDValue &SwizzleIndices = Index->getOperand(0); 2058 if (SwizzleSrc.getValueType() != MVT::v16i8 || 2059 SwizzleIndices.getValueType() != MVT::v16i8 || 2060 Index->getOperand(1)->getOpcode() != ISD::Constant || 2061 Index->getConstantOperandVal(1) != I) 2062 return Bail; 2063 return std::make_pair(SwizzleSrc, SwizzleIndices); 2064 }; 2065 2066 // If the lane is extracted from another vector at a constant index, return 2067 // that vector. The source vector must not have more lanes than the dest 2068 // because the shufflevector indices are in terms of the destination lanes and 2069 // would not be able to address the smaller individual source lanes. 2070 auto GetShuffleSrc = [&](const SDValue &Lane) { 2071 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 2072 return SDValue(); 2073 if (!isa<ConstantSDNode>(Lane->getOperand(1).getNode())) 2074 return SDValue(); 2075 if (Lane->getOperand(0).getValueType().getVectorNumElements() > 2076 VecT.getVectorNumElements()) 2077 return SDValue(); 2078 return Lane->getOperand(0); 2079 }; 2080 2081 using ValueEntry = std::pair<SDValue, size_t>; 2082 SmallVector<ValueEntry, 16> SplatValueCounts; 2083 2084 using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>; 2085 SmallVector<SwizzleEntry, 16> SwizzleCounts; 2086 2087 using ShuffleEntry = std::pair<SDValue, size_t>; 2088 SmallVector<ShuffleEntry, 16> ShuffleCounts; 2089 2090 auto AddCount = [](auto &Counts, const auto &Val) { 2091 auto CountIt = 2092 llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; }); 2093 if (CountIt == Counts.end()) { 2094 Counts.emplace_back(Val, 1); 2095 } else { 2096 CountIt->second++; 2097 } 2098 }; 2099 2100 auto GetMostCommon = [](auto &Counts) { 2101 auto CommonIt = 2102 std::max_element(Counts.begin(), Counts.end(), 2103 [](auto A, auto B) { return A.second < B.second; }); 2104 assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector"); 2105 return *CommonIt; 2106 }; 2107 2108 size_t NumConstantLanes = 0; 2109 2110 // Count eligible lanes for each type of vector creation op 2111 for (size_t I = 0; I < Lanes; ++I) { 2112 const SDValue &Lane = Op->getOperand(I); 2113 if (Lane.isUndef()) 2114 continue; 2115 2116 AddCount(SplatValueCounts, Lane); 2117 2118 if (IsConstant(Lane)) 2119 NumConstantLanes++; 2120 if (auto ShuffleSrc = GetShuffleSrc(Lane)) 2121 AddCount(ShuffleCounts, ShuffleSrc); 2122 if (CanSwizzle) { 2123 auto SwizzleSrcs = GetSwizzleSrcs(I, Lane); 2124 if (SwizzleSrcs.first) 2125 AddCount(SwizzleCounts, SwizzleSrcs); 2126 } 2127 } 2128 2129 SDValue SplatValue; 2130 size_t NumSplatLanes; 2131 std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts); 2132 2133 SDValue SwizzleSrc; 2134 SDValue SwizzleIndices; 2135 size_t NumSwizzleLanes = 0; 2136 if (SwizzleCounts.size()) 2137 std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices), 2138 NumSwizzleLanes) = GetMostCommon(SwizzleCounts); 2139 2140 // Shuffles can draw from up to two vectors, so find the two most common 2141 // sources. 2142 SDValue ShuffleSrc1, ShuffleSrc2; 2143 size_t NumShuffleLanes = 0; 2144 if (ShuffleCounts.size()) { 2145 std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts); 2146 llvm::erase_if(ShuffleCounts, 2147 [&](const auto &Pair) { return Pair.first == ShuffleSrc1; }); 2148 } 2149 if (ShuffleCounts.size()) { 2150 size_t AdditionalShuffleLanes; 2151 std::tie(ShuffleSrc2, AdditionalShuffleLanes) = 2152 GetMostCommon(ShuffleCounts); 2153 NumShuffleLanes += AdditionalShuffleLanes; 2154 } 2155 2156 // Predicate returning true if the lane is properly initialized by the 2157 // original instruction 2158 std::function<bool(size_t, const SDValue &)> IsLaneConstructed; 2159 SDValue Result; 2160 // Prefer swizzles over shuffles over vector consts over splats 2161 if (NumSwizzleLanes >= NumShuffleLanes && 2162 NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) { 2163 Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc, 2164 SwizzleIndices); 2165 auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices); 2166 IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) { 2167 return Swizzled == GetSwizzleSrcs(I, Lane); 2168 }; 2169 } else if (NumShuffleLanes >= NumConstantLanes && 2170 NumShuffleLanes >= NumSplatLanes) { 2171 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8; 2172 size_t DestLaneCount = VecT.getVectorNumElements(); 2173 size_t Scale1 = 1; 2174 size_t Scale2 = 1; 2175 SDValue Src1 = ShuffleSrc1; 2176 SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VecT); 2177 if (Src1.getValueType() != VecT) { 2178 size_t LaneSize = 2179 Src1.getValueType().getVectorElementType().getFixedSizeInBits() / 8; 2180 assert(LaneSize > DestLaneSize); 2181 Scale1 = LaneSize / DestLaneSize; 2182 Src1 = DAG.getBitcast(VecT, Src1); 2183 } 2184 if (Src2.getValueType() != VecT) { 2185 size_t LaneSize = 2186 Src2.getValueType().getVectorElementType().getFixedSizeInBits() / 8; 2187 assert(LaneSize > DestLaneSize); 2188 Scale2 = LaneSize / DestLaneSize; 2189 Src2 = DAG.getBitcast(VecT, Src2); 2190 } 2191 2192 int Mask[16]; 2193 assert(DestLaneCount <= 16); 2194 for (size_t I = 0; I < DestLaneCount; ++I) { 2195 const SDValue &Lane = Op->getOperand(I); 2196 SDValue Src = GetShuffleSrc(Lane); 2197 if (Src == ShuffleSrc1) { 2198 Mask[I] = Lane->getConstantOperandVal(1) * Scale1; 2199 } else if (Src && Src == ShuffleSrc2) { 2200 Mask[I] = DestLaneCount + Lane->getConstantOperandVal(1) * Scale2; 2201 } else { 2202 Mask[I] = -1; 2203 } 2204 } 2205 ArrayRef<int> MaskRef(Mask, DestLaneCount); 2206 Result = DAG.getVectorShuffle(VecT, DL, Src1, Src2, MaskRef); 2207 IsLaneConstructed = [&](size_t, const SDValue &Lane) { 2208 auto Src = GetShuffleSrc(Lane); 2209 return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2); 2210 }; 2211 } else if (NumConstantLanes >= NumSplatLanes) { 2212 SmallVector<SDValue, 16> ConstLanes; 2213 for (const SDValue &Lane : Op->op_values()) { 2214 if (IsConstant(Lane)) { 2215 // Values may need to be fixed so that they will sign extend to be 2216 // within the expected range during ISel. Check whether the value is in 2217 // bounds based on the lane bit width and if it is out of bounds, lop 2218 // off the extra bits and subtract 2^n to reflect giving the high bit 2219 // value -2^(n-1) rather than +2^(n-1). Skip the i64 case because it 2220 // cannot possibly be out of range. 2221 auto *Const = dyn_cast<ConstantSDNode>(Lane.getNode()); 2222 int64_t Val = Const ? Const->getSExtValue() : 0; 2223 uint64_t LaneBits = 128 / Lanes; 2224 assert((LaneBits == 64 || Val >= -(1ll << (LaneBits - 1))) && 2225 "Unexpected out of bounds negative value"); 2226 if (Const && LaneBits != 64 && Val > (1ll << (LaneBits - 1)) - 1) { 2227 auto NewVal = ((uint64_t)Val % (1ll << LaneBits)) - (1ll << LaneBits); 2228 ConstLanes.push_back(DAG.getConstant(NewVal, SDLoc(Lane), LaneT)); 2229 } else { 2230 ConstLanes.push_back(Lane); 2231 } 2232 } else if (LaneT.isFloatingPoint()) { 2233 ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT)); 2234 } else { 2235 ConstLanes.push_back(DAG.getConstant(0, DL, LaneT)); 2236 } 2237 } 2238 Result = DAG.getBuildVector(VecT, DL, ConstLanes); 2239 IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) { 2240 return IsConstant(Lane); 2241 }; 2242 } else { 2243 // Use a splat, but possibly a load_splat 2244 LoadSDNode *SplattedLoad; 2245 if ((SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) && 2246 SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) { 2247 Result = DAG.getMemIntrinsicNode( 2248 WebAssemblyISD::LOAD_SPLAT, DL, DAG.getVTList(VecT), 2249 {SplattedLoad->getChain(), SplattedLoad->getBasePtr(), 2250 SplattedLoad->getOffset()}, 2251 SplattedLoad->getMemoryVT(), SplattedLoad->getMemOperand()); 2252 } else { 2253 Result = DAG.getSplatBuildVector(VecT, DL, SplatValue); 2254 } 2255 IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) { 2256 return Lane == SplatValue; 2257 }; 2258 } 2259 2260 assert(Result); 2261 assert(IsLaneConstructed); 2262 2263 // Add replace_lane instructions for any unhandled values 2264 for (size_t I = 0; I < Lanes; ++I) { 2265 const SDValue &Lane = Op->getOperand(I); 2266 if (!Lane.isUndef() && !IsLaneConstructed(I, Lane)) 2267 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane, 2268 DAG.getConstant(I, DL, MVT::i32)); 2269 } 2270 2271 return Result; 2272 } 2273 2274 SDValue 2275 WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, 2276 SelectionDAG &DAG) const { 2277 SDLoc DL(Op); 2278 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask(); 2279 MVT VecType = Op.getOperand(0).getSimpleValueType(); 2280 assert(VecType.is128BitVector() && "Unexpected shuffle vector type"); 2281 size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8; 2282 2283 // Space for two vector args and sixteen mask indices 2284 SDValue Ops[18]; 2285 size_t OpIdx = 0; 2286 Ops[OpIdx++] = Op.getOperand(0); 2287 Ops[OpIdx++] = Op.getOperand(1); 2288 2289 // Expand mask indices to byte indices and materialize them as operands 2290 for (int M : Mask) { 2291 for (size_t J = 0; J < LaneBytes; ++J) { 2292 // Lower undefs (represented by -1 in mask) to zero 2293 uint64_t ByteIndex = M == -1 ? 0 : (uint64_t)M * LaneBytes + J; 2294 Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32); 2295 } 2296 } 2297 2298 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops); 2299 } 2300 2301 SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op, 2302 SelectionDAG &DAG) const { 2303 SDLoc DL(Op); 2304 // The legalizer does not know how to expand the unsupported comparison modes 2305 // of i64x2 vectors, so we manually unroll them here. 2306 assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64); 2307 SmallVector<SDValue, 2> LHS, RHS; 2308 DAG.ExtractVectorElements(Op->getOperand(0), LHS); 2309 DAG.ExtractVectorElements(Op->getOperand(1), RHS); 2310 const SDValue &CC = Op->getOperand(2); 2311 auto MakeLane = [&](unsigned I) { 2312 return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I], 2313 DAG.getConstant(uint64_t(-1), DL, MVT::i64), 2314 DAG.getConstant(uint64_t(0), DL, MVT::i64), CC); 2315 }; 2316 return DAG.getBuildVector(Op->getValueType(0), DL, 2317 {MakeLane(0), MakeLane(1)}); 2318 } 2319 2320 SDValue 2321 WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op, 2322 SelectionDAG &DAG) const { 2323 // Allow constant lane indices, expand variable lane indices 2324 SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode(); 2325 if (isa<ConstantSDNode>(IdxNode) || IdxNode->isUndef()) 2326 return Op; 2327 else 2328 // Perform default expansion 2329 return SDValue(); 2330 } 2331 2332 static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) { 2333 EVT LaneT = Op.getSimpleValueType().getVectorElementType(); 2334 // 32-bit and 64-bit unrolled shifts will have proper semantics 2335 if (LaneT.bitsGE(MVT::i32)) 2336 return DAG.UnrollVectorOp(Op.getNode()); 2337 // Otherwise mask the shift value to get proper semantics from 32-bit shift 2338 SDLoc DL(Op); 2339 size_t NumLanes = Op.getSimpleValueType().getVectorNumElements(); 2340 SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32); 2341 unsigned ShiftOpcode = Op.getOpcode(); 2342 SmallVector<SDValue, 16> ShiftedElements; 2343 DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32); 2344 SmallVector<SDValue, 16> ShiftElements; 2345 DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32); 2346 SmallVector<SDValue, 16> UnrolledOps; 2347 for (size_t i = 0; i < NumLanes; ++i) { 2348 SDValue MaskedShiftValue = 2349 DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask); 2350 SDValue ShiftedValue = ShiftedElements[i]; 2351 if (ShiftOpcode == ISD::SRA) 2352 ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, 2353 ShiftedValue, DAG.getValueType(LaneT)); 2354 UnrolledOps.push_back( 2355 DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue)); 2356 } 2357 return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps); 2358 } 2359 2360 SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op, 2361 SelectionDAG &DAG) const { 2362 SDLoc DL(Op); 2363 2364 // Only manually lower vector shifts 2365 assert(Op.getSimpleValueType().isVector()); 2366 2367 auto ShiftVal = DAG.getSplatValue(Op.getOperand(1)); 2368 if (!ShiftVal) 2369 return unrollVectorShift(Op, DAG); 2370 2371 // Use anyext because none of the high bits can affect the shift 2372 ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32); 2373 2374 unsigned Opcode; 2375 switch (Op.getOpcode()) { 2376 case ISD::SHL: 2377 Opcode = WebAssemblyISD::VEC_SHL; 2378 break; 2379 case ISD::SRA: 2380 Opcode = WebAssemblyISD::VEC_SHR_S; 2381 break; 2382 case ISD::SRL: 2383 Opcode = WebAssemblyISD::VEC_SHR_U; 2384 break; 2385 default: 2386 llvm_unreachable("unexpected opcode"); 2387 } 2388 2389 return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal); 2390 } 2391 2392 SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op, 2393 SelectionDAG &DAG) const { 2394 SDLoc DL(Op); 2395 EVT ResT = Op.getValueType(); 2396 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 2397 2398 if ((ResT == MVT::i32 || ResT == MVT::i64) && 2399 (SatVT == MVT::i32 || SatVT == MVT::i64)) 2400 return Op; 2401 2402 if (ResT == MVT::v4i32 && SatVT == MVT::i32) 2403 return Op; 2404 2405 return SDValue(); 2406 } 2407 2408 //===----------------------------------------------------------------------===// 2409 // Custom DAG combine hooks 2410 //===----------------------------------------------------------------------===// 2411 static SDValue 2412 performVECTOR_SHUFFLECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 2413 auto &DAG = DCI.DAG; 2414 auto Shuffle = cast<ShuffleVectorSDNode>(N); 2415 2416 // Hoist vector bitcasts that don't change the number of lanes out of unary 2417 // shuffles, where they are less likely to get in the way of other combines. 2418 // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) -> 2419 // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask)))) 2420 SDValue Bitcast = N->getOperand(0); 2421 if (Bitcast.getOpcode() != ISD::BITCAST) 2422 return SDValue(); 2423 if (!N->getOperand(1).isUndef()) 2424 return SDValue(); 2425 SDValue CastOp = Bitcast.getOperand(0); 2426 MVT SrcType = CastOp.getSimpleValueType(); 2427 MVT DstType = Bitcast.getSimpleValueType(); 2428 if (!SrcType.is128BitVector() || 2429 SrcType.getVectorNumElements() != DstType.getVectorNumElements()) 2430 return SDValue(); 2431 SDValue NewShuffle = DAG.getVectorShuffle( 2432 SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask()); 2433 return DAG.getBitcast(DstType, NewShuffle); 2434 } 2435 2436 static SDValue 2437 performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 2438 auto &DAG = DCI.DAG; 2439 assert(N->getOpcode() == ISD::SIGN_EXTEND || 2440 N->getOpcode() == ISD::ZERO_EXTEND); 2441 2442 // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if 2443 // possible before the extract_subvector can be expanded. 2444 auto Extract = N->getOperand(0); 2445 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR) 2446 return SDValue(); 2447 auto Source = Extract.getOperand(0); 2448 auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1)); 2449 if (IndexNode == nullptr) 2450 return SDValue(); 2451 auto Index = IndexNode->getZExtValue(); 2452 2453 // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the 2454 // extracted subvector is the low or high half of its source. 2455 EVT ResVT = N->getValueType(0); 2456 if (ResVT == MVT::v8i16) { 2457 if (Extract.getValueType() != MVT::v8i8 || 2458 Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8)) 2459 return SDValue(); 2460 } else if (ResVT == MVT::v4i32) { 2461 if (Extract.getValueType() != MVT::v4i16 || 2462 Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4)) 2463 return SDValue(); 2464 } else if (ResVT == MVT::v2i64) { 2465 if (Extract.getValueType() != MVT::v2i32 || 2466 Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2)) 2467 return SDValue(); 2468 } else { 2469 return SDValue(); 2470 } 2471 2472 bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND; 2473 bool IsLow = Index == 0; 2474 2475 unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S 2476 : WebAssemblyISD::EXTEND_HIGH_S) 2477 : (IsLow ? WebAssemblyISD::EXTEND_LOW_U 2478 : WebAssemblyISD::EXTEND_HIGH_U); 2479 2480 return DAG.getNode(Op, SDLoc(N), ResVT, Source); 2481 } 2482 2483 static SDValue 2484 performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 2485 auto &DAG = DCI.DAG; 2486 2487 auto GetWasmConversionOp = [](unsigned Op) { 2488 switch (Op) { 2489 case ISD::FP_TO_SINT_SAT: 2490 return WebAssemblyISD::TRUNC_SAT_ZERO_S; 2491 case ISD::FP_TO_UINT_SAT: 2492 return WebAssemblyISD::TRUNC_SAT_ZERO_U; 2493 case ISD::FP_ROUND: 2494 return WebAssemblyISD::DEMOTE_ZERO; 2495 } 2496 llvm_unreachable("unexpected op"); 2497 }; 2498 2499 auto IsZeroSplat = [](SDValue SplatVal) { 2500 auto *Splat = dyn_cast<BuildVectorSDNode>(SplatVal.getNode()); 2501 APInt SplatValue, SplatUndef; 2502 unsigned SplatBitSize; 2503 bool HasAnyUndefs; 2504 return Splat && 2505 Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, 2506 HasAnyUndefs) && 2507 SplatValue == 0; 2508 }; 2509 2510 if (N->getOpcode() == ISD::CONCAT_VECTORS) { 2511 // Combine this: 2512 // 2513 // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0))) 2514 // 2515 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x). 2516 // 2517 // Or this: 2518 // 2519 // (concat_vectors (v2f32 (fp_round (v2f64 $x))), (v2f32 (splat 0))) 2520 // 2521 // into (f32x4.demote_zero_f64x2 $x). 2522 EVT ResVT; 2523 EVT ExpectedConversionType; 2524 auto Conversion = N->getOperand(0); 2525 auto ConversionOp = Conversion.getOpcode(); 2526 switch (ConversionOp) { 2527 case ISD::FP_TO_SINT_SAT: 2528 case ISD::FP_TO_UINT_SAT: 2529 ResVT = MVT::v4i32; 2530 ExpectedConversionType = MVT::v2i32; 2531 break; 2532 case ISD::FP_ROUND: 2533 ResVT = MVT::v4f32; 2534 ExpectedConversionType = MVT::v2f32; 2535 break; 2536 default: 2537 return SDValue(); 2538 } 2539 2540 if (N->getValueType(0) != ResVT) 2541 return SDValue(); 2542 2543 if (Conversion.getValueType() != ExpectedConversionType) 2544 return SDValue(); 2545 2546 auto Source = Conversion.getOperand(0); 2547 if (Source.getValueType() != MVT::v2f64) 2548 return SDValue(); 2549 2550 if (!IsZeroSplat(N->getOperand(1)) || 2551 N->getOperand(1).getValueType() != ExpectedConversionType) 2552 return SDValue(); 2553 2554 unsigned Op = GetWasmConversionOp(ConversionOp); 2555 return DAG.getNode(Op, SDLoc(N), ResVT, Source); 2556 } 2557 2558 // Combine this: 2559 // 2560 // (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32) 2561 // 2562 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x). 2563 // 2564 // Or this: 2565 // 2566 // (v4f32 (fp_round (concat_vectors $x, (v2f64 (splat 0))))) 2567 // 2568 // into (f32x4.demote_zero_f64x2 $x). 2569 EVT ResVT; 2570 auto ConversionOp = N->getOpcode(); 2571 switch (ConversionOp) { 2572 case ISD::FP_TO_SINT_SAT: 2573 case ISD::FP_TO_UINT_SAT: 2574 ResVT = MVT::v4i32; 2575 break; 2576 case ISD::FP_ROUND: 2577 ResVT = MVT::v4f32; 2578 break; 2579 default: 2580 llvm_unreachable("unexpected op"); 2581 } 2582 2583 if (N->getValueType(0) != ResVT) 2584 return SDValue(); 2585 2586 auto Concat = N->getOperand(0); 2587 if (Concat.getValueType() != MVT::v4f64) 2588 return SDValue(); 2589 2590 auto Source = Concat.getOperand(0); 2591 if (Source.getValueType() != MVT::v2f64) 2592 return SDValue(); 2593 2594 if (!IsZeroSplat(Concat.getOperand(1)) || 2595 Concat.getOperand(1).getValueType() != MVT::v2f64) 2596 return SDValue(); 2597 2598 unsigned Op = GetWasmConversionOp(ConversionOp); 2599 return DAG.getNode(Op, SDLoc(N), ResVT, Source); 2600 } 2601 2602 SDValue 2603 WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N, 2604 DAGCombinerInfo &DCI) const { 2605 switch (N->getOpcode()) { 2606 default: 2607 return SDValue(); 2608 case ISD::VECTOR_SHUFFLE: 2609 return performVECTOR_SHUFFLECombine(N, DCI); 2610 case ISD::SIGN_EXTEND: 2611 case ISD::ZERO_EXTEND: 2612 return performVectorExtendCombine(N, DCI); 2613 case ISD::FP_TO_SINT_SAT: 2614 case ISD::FP_TO_UINT_SAT: 2615 case ISD::FP_ROUND: 2616 case ISD::CONCAT_VECTORS: 2617 return performVectorTruncZeroCombine(N, DCI); 2618 } 2619 } 2620