1 //===- WebAssemblyTargetMachine.cpp - Define TargetMachine for WebAssembly -==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file defines the WebAssembly-specific subclass of TargetMachine. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "WebAssemblyTargetMachine.h" 15 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 16 #include "TargetInfo/WebAssemblyTargetInfo.h" 17 #include "WebAssembly.h" 18 #include "WebAssemblyMachineFunctionInfo.h" 19 #include "WebAssemblyTargetObjectFile.h" 20 #include "WebAssemblyTargetTransformInfo.h" 21 #include "llvm/CodeGen/MIRParser/MIParser.h" 22 #include "llvm/CodeGen/MachineFunctionPass.h" 23 #include "llvm/CodeGen/Passes.h" 24 #include "llvm/CodeGen/RegAllocRegistry.h" 25 #include "llvm/CodeGen/TargetPassConfig.h" 26 #include "llvm/IR/Function.h" 27 #include "llvm/Support/TargetRegistry.h" 28 #include "llvm/Target/TargetOptions.h" 29 #include "llvm/Transforms/Scalar.h" 30 #include "llvm/Transforms/Scalar/LowerAtomic.h" 31 #include "llvm/Transforms/Utils.h" 32 using namespace llvm; 33 34 #define DEBUG_TYPE "wasm" 35 36 // Emscripten's asm.js-style exception handling 37 static cl::opt<bool> EnableEmException( 38 "enable-emscripten-cxx-exceptions", 39 cl::desc("WebAssembly Emscripten-style exception handling"), 40 cl::init(false)); 41 42 // Emscripten's asm.js-style setjmp/longjmp handling 43 static cl::opt<bool> EnableEmSjLj( 44 "enable-emscripten-sjlj", 45 cl::desc("WebAssembly Emscripten-style setjmp/longjmp handling"), 46 cl::init(false)); 47 48 extern "C" void LLVMInitializeWebAssemblyTarget() { 49 // Register the target. 50 RegisterTargetMachine<WebAssemblyTargetMachine> X( 51 getTheWebAssemblyTarget32()); 52 RegisterTargetMachine<WebAssemblyTargetMachine> Y( 53 getTheWebAssemblyTarget64()); 54 55 // Register backend passes 56 auto &PR = *PassRegistry::getPassRegistry(); 57 initializeWebAssemblyAddMissingPrototypesPass(PR); 58 initializeWebAssemblyLowerEmscriptenEHSjLjPass(PR); 59 initializeLowerGlobalDtorsPass(PR); 60 initializeFixFunctionBitcastsPass(PR); 61 initializeOptimizeReturnedPass(PR); 62 initializeWebAssemblyArgumentMovePass(PR); 63 initializeWebAssemblySetP2AlignOperandsPass(PR); 64 initializeWebAssemblyReplacePhysRegsPass(PR); 65 initializeWebAssemblyPrepareForLiveIntervalsPass(PR); 66 initializeWebAssemblyOptimizeLiveIntervalsPass(PR); 67 initializeWebAssemblyMemIntrinsicResultsPass(PR); 68 initializeWebAssemblyRegStackifyPass(PR); 69 initializeWebAssemblyRegColoringPass(PR); 70 initializeWebAssemblyFixIrreducibleControlFlowPass(PR); 71 initializeWebAssemblyLateEHPreparePass(PR); 72 initializeWebAssemblyExceptionInfoPass(PR); 73 initializeWebAssemblyCFGSortPass(PR); 74 initializeWebAssemblyCFGStackifyPass(PR); 75 initializeWebAssemblyExplicitLocalsPass(PR); 76 initializeWebAssemblyLowerBrUnlessPass(PR); 77 initializeWebAssemblyRegNumberingPass(PR); 78 initializeWebAssemblyPeepholePass(PR); 79 initializeWebAssemblyCallIndirectFixupPass(PR); 80 } 81 82 //===----------------------------------------------------------------------===// 83 // WebAssembly Lowering public interface. 84 //===----------------------------------------------------------------------===// 85 86 static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) { 87 if (!RM.hasValue()) { 88 // Default to static relocation model. This should always be more optimial 89 // than PIC since the static linker can determine all global addresses and 90 // assume direct function calls. 91 return Reloc::Static; 92 } 93 return *RM; 94 } 95 96 /// Create an WebAssembly architecture model. 97 /// 98 WebAssemblyTargetMachine::WebAssemblyTargetMachine( 99 const Target &T, const Triple &TT, StringRef CPU, StringRef FS, 100 const TargetOptions &Options, Optional<Reloc::Model> RM, 101 Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT) 102 : LLVMTargetMachine(T, 103 TT.isArch64Bit() ? "e-m:e-p:64:64-i64:64-n32:64-S128" 104 : "e-m:e-p:32:32-i64:64-n32:64-S128", 105 TT, CPU, FS, Options, getEffectiveRelocModel(RM), 106 getEffectiveCodeModel(CM, CodeModel::Large), OL), 107 TLOF(new WebAssemblyTargetObjectFile()) { 108 // WebAssembly type-checks instructions, but a noreturn function with a return 109 // type that doesn't match the context will cause a check failure. So we lower 110 // LLVM 'unreachable' to ISD::TRAP and then lower that to WebAssembly's 111 // 'unreachable' instructions which is meant for that case. 112 this->Options.TrapUnreachable = true; 113 114 // WebAssembly treats each function as an independent unit. Force 115 // -ffunction-sections, effectively, so that we can emit them independently. 116 this->Options.FunctionSections = true; 117 this->Options.DataSections = true; 118 this->Options.UniqueSectionNames = true; 119 120 initAsmInfo(); 121 122 // Note that we don't use setRequiresStructuredCFG(true). It disables 123 // optimizations than we're ok with, and want, such as critical edge 124 // splitting and tail merging. 125 } 126 127 WebAssemblyTargetMachine::~WebAssemblyTargetMachine() = default; // anchor. 128 129 const WebAssemblySubtarget * 130 WebAssemblyTargetMachine::getSubtargetImpl(std::string CPU, 131 std::string FS) const { 132 auto &I = SubtargetMap[CPU + FS]; 133 if (!I) { 134 I = llvm::make_unique<WebAssemblySubtarget>(TargetTriple, CPU, FS, *this); 135 } 136 return I.get(); 137 } 138 139 const WebAssemblySubtarget * 140 WebAssemblyTargetMachine::getSubtargetImpl(const Function &F) const { 141 Attribute CPUAttr = F.getFnAttribute("target-cpu"); 142 Attribute FSAttr = F.getFnAttribute("target-features"); 143 144 std::string CPU = !CPUAttr.hasAttribute(Attribute::None) 145 ? CPUAttr.getValueAsString().str() 146 : TargetCPU; 147 std::string FS = !FSAttr.hasAttribute(Attribute::None) 148 ? FSAttr.getValueAsString().str() 149 : TargetFS; 150 151 // This needs to be done before we create a new subtarget since any 152 // creation will depend on the TM and the code generation flags on the 153 // function that reside in TargetOptions. 154 resetTargetOptions(F); 155 156 return getSubtargetImpl(CPU, FS); 157 } 158 159 namespace { 160 161 class CoalesceFeaturesAndStripAtomics final : public ModulePass { 162 // Take the union of all features used in the module and use it for each 163 // function individually, since having multiple feature sets in one module 164 // currently does not make sense for WebAssembly. If atomics are not enabled, 165 // also strip atomic operations and thread local storage. 166 static char ID; 167 WebAssemblyTargetMachine *WasmTM; 168 169 public: 170 CoalesceFeaturesAndStripAtomics(WebAssemblyTargetMachine *WasmTM) 171 : ModulePass(ID), WasmTM(WasmTM) {} 172 173 bool runOnModule(Module &M) override { 174 FeatureBitset Features = coalesceFeatures(M); 175 176 std::string FeatureStr = getFeatureString(Features); 177 for (auto &F : M) 178 replaceFeatures(F, FeatureStr); 179 180 bool Stripped = false; 181 if (!Features[WebAssembly::FeatureAtomics]) { 182 Stripped |= stripAtomics(M); 183 Stripped |= stripThreadLocals(M); 184 } 185 186 recordFeatures(M, Features, Stripped); 187 188 // Conservatively assume we have made some change 189 return true; 190 } 191 192 private: 193 FeatureBitset coalesceFeatures(const Module &M) { 194 FeatureBitset Features = 195 WasmTM 196 ->getSubtargetImpl(WasmTM->getTargetCPU(), 197 WasmTM->getTargetFeatureString()) 198 ->getFeatureBits(); 199 for (auto &F : M) 200 Features |= WasmTM->getSubtargetImpl(F)->getFeatureBits(); 201 return Features; 202 } 203 204 std::string getFeatureString(const FeatureBitset &Features) { 205 std::string Ret; 206 for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) { 207 if (Features[KV.Value]) 208 Ret += (StringRef("+") + KV.Key + ",").str(); 209 } 210 return Ret; 211 } 212 213 void replaceFeatures(Function &F, const std::string &Features) { 214 F.removeFnAttr("target-features"); 215 F.removeFnAttr("target-cpu"); 216 F.addFnAttr("target-features", Features); 217 } 218 219 bool stripAtomics(Module &M) { 220 // Detect whether any atomics will be lowered, since there is no way to tell 221 // whether the LowerAtomic pass lowers e.g. stores. 222 bool Stripped = false; 223 for (auto &F : M) { 224 for (auto &B : F) { 225 for (auto &I : B) { 226 if (I.isAtomic()) { 227 Stripped = true; 228 goto done; 229 } 230 } 231 } 232 } 233 234 done: 235 if (!Stripped) 236 return false; 237 238 LowerAtomicPass Lowerer; 239 FunctionAnalysisManager FAM; 240 for (auto &F : M) 241 Lowerer.run(F, FAM); 242 243 return true; 244 } 245 246 bool stripThreadLocals(Module &M) { 247 bool Stripped = false; 248 for (auto &GV : M.globals()) { 249 if (GV.getThreadLocalMode() != 250 GlobalValue::ThreadLocalMode::NotThreadLocal) { 251 Stripped = true; 252 GV.setThreadLocalMode(GlobalValue::ThreadLocalMode::NotThreadLocal); 253 } 254 } 255 return Stripped; 256 } 257 258 void recordFeatures(Module &M, const FeatureBitset &Features, bool Stripped) { 259 for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) { 260 std::string MDKey = (StringRef("wasm-feature-") + KV.Key).str(); 261 if (KV.Value == WebAssembly::FeatureAtomics && Stripped) { 262 // "atomics" is special: code compiled without atomics may have had its 263 // atomics lowered to nonatomic operations. In that case, atomics is 264 // disallowed to prevent unsafe linking with atomics-enabled objects. 265 assert(!Features[WebAssembly::FeatureAtomics]); 266 M.addModuleFlag(Module::ModFlagBehavior::Error, MDKey, 267 wasm::WASM_FEATURE_PREFIX_DISALLOWED); 268 } else if (Features[KV.Value]) { 269 // Otherwise features are marked Used or not mentioned 270 M.addModuleFlag(Module::ModFlagBehavior::Error, MDKey, 271 wasm::WASM_FEATURE_PREFIX_USED); 272 } 273 } 274 } 275 }; 276 char CoalesceFeaturesAndStripAtomics::ID = 0; 277 278 /// WebAssembly Code Generator Pass Configuration Options. 279 class WebAssemblyPassConfig final : public TargetPassConfig { 280 public: 281 WebAssemblyPassConfig(WebAssemblyTargetMachine &TM, PassManagerBase &PM) 282 : TargetPassConfig(TM, PM) {} 283 284 WebAssemblyTargetMachine &getWebAssemblyTargetMachine() const { 285 return getTM<WebAssemblyTargetMachine>(); 286 } 287 288 FunctionPass *createTargetRegisterAllocator(bool) override; 289 290 void addIRPasses() override; 291 bool addInstSelector() override; 292 void addPostRegAlloc() override; 293 bool addGCPasses() override { return false; } 294 void addPreEmitPass() override; 295 296 // No reg alloc 297 bool addRegAssignmentFast() override { return false; } 298 299 // No reg alloc 300 bool addRegAssignmentOptimized() override { return false; } 301 }; 302 } // end anonymous namespace 303 304 TargetTransformInfo 305 WebAssemblyTargetMachine::getTargetTransformInfo(const Function &F) { 306 return TargetTransformInfo(WebAssemblyTTIImpl(this, F)); 307 } 308 309 TargetPassConfig * 310 WebAssemblyTargetMachine::createPassConfig(PassManagerBase &PM) { 311 return new WebAssemblyPassConfig(*this, PM); 312 } 313 314 FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) { 315 return nullptr; // No reg alloc 316 } 317 318 //===----------------------------------------------------------------------===// 319 // The following functions are called from lib/CodeGen/Passes.cpp to modify 320 // the CodeGen pass sequence. 321 //===----------------------------------------------------------------------===// 322 323 void WebAssemblyPassConfig::addIRPasses() { 324 // Runs LowerAtomicPass if necessary 325 addPass(new CoalesceFeaturesAndStripAtomics(&getWebAssemblyTargetMachine())); 326 327 // This is a no-op if atomics are not used in the module 328 addPass(createAtomicExpandPass()); 329 330 // Add signatures to prototype-less function declarations 331 addPass(createWebAssemblyAddMissingPrototypes()); 332 333 // Lower .llvm.global_dtors into .llvm_global_ctors with __cxa_atexit calls. 334 addPass(createWebAssemblyLowerGlobalDtors()); 335 336 // Fix function bitcasts, as WebAssembly requires caller and callee signatures 337 // to match. 338 addPass(createWebAssemblyFixFunctionBitcasts()); 339 340 // Optimize "returned" function attributes. 341 if (getOptLevel() != CodeGenOpt::None) 342 addPass(createWebAssemblyOptimizeReturned()); 343 344 // If exception handling is not enabled and setjmp/longjmp handling is 345 // enabled, we lower invokes into calls and delete unreachable landingpad 346 // blocks. Lowering invokes when there is no EH support is done in 347 // TargetPassConfig::addPassesToHandleExceptions, but this runs after this 348 // function and SjLj handling expects all invokes to be lowered before. 349 if (!EnableEmException && 350 TM->Options.ExceptionModel == ExceptionHandling::None) { 351 addPass(createLowerInvokePass()); 352 // The lower invoke pass may create unreachable code. Remove it in order not 353 // to process dead blocks in setjmp/longjmp handling. 354 addPass(createUnreachableBlockEliminationPass()); 355 } 356 357 // Handle exceptions and setjmp/longjmp if enabled. 358 if (EnableEmException || EnableEmSjLj) 359 addPass(createWebAssemblyLowerEmscriptenEHSjLj(EnableEmException, 360 EnableEmSjLj)); 361 362 TargetPassConfig::addIRPasses(); 363 } 364 365 bool WebAssemblyPassConfig::addInstSelector() { 366 (void)TargetPassConfig::addInstSelector(); 367 addPass( 368 createWebAssemblyISelDag(getWebAssemblyTargetMachine(), getOptLevel())); 369 // Run the argument-move pass immediately after the ScheduleDAG scheduler 370 // so that we can fix up the ARGUMENT instructions before anything else 371 // sees them in the wrong place. 372 addPass(createWebAssemblyArgumentMove()); 373 // Set the p2align operands. This information is present during ISel, however 374 // it's inconvenient to collect. Collect it now, and update the immediate 375 // operands. 376 addPass(createWebAssemblySetP2AlignOperands()); 377 return false; 378 } 379 380 void WebAssemblyPassConfig::addPostRegAlloc() { 381 // TODO: The following CodeGen passes don't currently support code containing 382 // virtual registers. Consider removing their restrictions and re-enabling 383 // them. 384 385 // These functions all require the NoVRegs property. 386 disablePass(&MachineCopyPropagationID); 387 disablePass(&PostRAMachineSinkingID); 388 disablePass(&PostRASchedulerID); 389 disablePass(&FuncletLayoutID); 390 disablePass(&StackMapLivenessID); 391 disablePass(&LiveDebugValuesID); 392 disablePass(&PatchableFunctionID); 393 disablePass(&ShrinkWrapID); 394 395 // This pass hurts code size for wasm because it can generate irreducible 396 // control flow. 397 disablePass(&MachineBlockPlacementID); 398 399 TargetPassConfig::addPostRegAlloc(); 400 } 401 402 void WebAssemblyPassConfig::addPreEmitPass() { 403 TargetPassConfig::addPreEmitPass(); 404 405 // Rewrite pseudo call_indirect instructions as real instructions. 406 // This needs to run before register stackification, because we change the 407 // order of the arguments. 408 addPass(createWebAssemblyCallIndirectFixup()); 409 410 // Eliminate multiple-entry loops. 411 addPass(createWebAssemblyFixIrreducibleControlFlow()); 412 413 // Do various transformations for exception handling. 414 // Every CFG-changing optimizations should come before this. 415 addPass(createWebAssemblyLateEHPrepare()); 416 417 // Now that we have a prologue and epilogue and all frame indices are 418 // rewritten, eliminate SP and FP. This allows them to be stackified, 419 // colored, and numbered with the rest of the registers. 420 addPass(createWebAssemblyReplacePhysRegs()); 421 422 // Preparations and optimizations related to register stackification. 423 if (getOptLevel() != CodeGenOpt::None) { 424 // LiveIntervals isn't commonly run this late. Re-establish preconditions. 425 addPass(createWebAssemblyPrepareForLiveIntervals()); 426 427 // Depend on LiveIntervals and perform some optimizations on it. 428 addPass(createWebAssemblyOptimizeLiveIntervals()); 429 430 // Prepare memory intrinsic calls for register stackifying. 431 addPass(createWebAssemblyMemIntrinsicResults()); 432 433 // Mark registers as representing wasm's value stack. This is a key 434 // code-compression technique in WebAssembly. We run this pass (and 435 // MemIntrinsicResults above) very late, so that it sees as much code as 436 // possible, including code emitted by PEI and expanded by late tail 437 // duplication. 438 addPass(createWebAssemblyRegStackify()); 439 440 // Run the register coloring pass to reduce the total number of registers. 441 // This runs after stackification so that it doesn't consider registers 442 // that become stackified. 443 addPass(createWebAssemblyRegColoring()); 444 } 445 446 // Sort the blocks of the CFG into topological order, a prerequisite for 447 // BLOCK and LOOP markers. 448 addPass(createWebAssemblyCFGSort()); 449 450 // Insert BLOCK and LOOP markers. 451 addPass(createWebAssemblyCFGStackify()); 452 453 // Insert explicit local.get and local.set operators. 454 addPass(createWebAssemblyExplicitLocals()); 455 456 // Lower br_unless into br_if. 457 addPass(createWebAssemblyLowerBrUnless()); 458 459 // Perform the very last peephole optimizations on the code. 460 if (getOptLevel() != CodeGenOpt::None) 461 addPass(createWebAssemblyPeephole()); 462 463 // Create a mapping from LLVM CodeGen virtual registers to wasm registers. 464 addPass(createWebAssemblyRegNumbering()); 465 } 466 467 yaml::MachineFunctionInfo * 468 WebAssemblyTargetMachine::createDefaultFuncInfoYAML() const { 469 return new yaml::WebAssemblyFunctionInfo(); 470 } 471 472 yaml::MachineFunctionInfo *WebAssemblyTargetMachine::convertFuncInfoToYAML( 473 const MachineFunction &MF) const { 474 const auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>(); 475 return new yaml::WebAssemblyFunctionInfo(*MFI); 476 } 477 478 bool WebAssemblyTargetMachine::parseMachineFunctionInfo( 479 const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS, 480 SMDiagnostic &Error, SMRange &SourceRange) const { 481 const auto &YamlMFI = 482 reinterpret_cast<const yaml::WebAssemblyFunctionInfo &>(MFI); 483 MachineFunction &MF = PFS.MF; 484 MF.getInfo<WebAssemblyFunctionInfo>()->initializeBaseYamlFields(YamlMFI); 485 return false; 486 } 487