1 //===- WebAssemblyTargetMachine.cpp - Define TargetMachine for WebAssembly -==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines the WebAssembly-specific subclass of TargetMachine.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "WebAssemblyTargetMachine.h"
15 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
16 #include "TargetInfo/WebAssemblyTargetInfo.h"
17 #include "WebAssembly.h"
18 #include "WebAssemblyMachineFunctionInfo.h"
19 #include "WebAssemblyTargetObjectFile.h"
20 #include "WebAssemblyTargetTransformInfo.h"
21 #include "llvm/CodeGen/MIRParser/MIParser.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/Passes.h"
24 #include "llvm/CodeGen/RegAllocRegistry.h"
25 #include "llvm/CodeGen/TargetPassConfig.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/Support/TargetRegistry.h"
28 #include "llvm/Target/TargetOptions.h"
29 #include "llvm/Transforms/Scalar.h"
30 #include "llvm/Transforms/Scalar/LowerAtomic.h"
31 #include "llvm/Transforms/Utils.h"
32 using namespace llvm;
33 
34 #define DEBUG_TYPE "wasm"
35 
36 // Emscripten's asm.js-style exception handling
37 static cl::opt<bool> EnableEmException(
38     "enable-emscripten-cxx-exceptions",
39     cl::desc("WebAssembly Emscripten-style exception handling"),
40     cl::init(false));
41 
42 // Emscripten's asm.js-style setjmp/longjmp handling
43 static cl::opt<bool> EnableEmSjLj(
44     "enable-emscripten-sjlj",
45     cl::desc("WebAssembly Emscripten-style setjmp/longjmp handling"),
46     cl::init(false));
47 
48 extern "C" void LLVMInitializeWebAssemblyTarget() {
49   // Register the target.
50   RegisterTargetMachine<WebAssemblyTargetMachine> X(
51       getTheWebAssemblyTarget32());
52   RegisterTargetMachine<WebAssemblyTargetMachine> Y(
53       getTheWebAssemblyTarget64());
54 
55   // Register backend passes
56   auto &PR = *PassRegistry::getPassRegistry();
57   initializeWebAssemblyAddMissingPrototypesPass(PR);
58   initializeWebAssemblyLowerEmscriptenEHSjLjPass(PR);
59   initializeLowerGlobalDtorsPass(PR);
60   initializeFixFunctionBitcastsPass(PR);
61   initializeOptimizeReturnedPass(PR);
62   initializeWebAssemblyArgumentMovePass(PR);
63   initializeWebAssemblySetP2AlignOperandsPass(PR);
64   initializeWebAssemblyReplacePhysRegsPass(PR);
65   initializeWebAssemblyPrepareForLiveIntervalsPass(PR);
66   initializeWebAssemblyOptimizeLiveIntervalsPass(PR);
67   initializeWebAssemblyMemIntrinsicResultsPass(PR);
68   initializeWebAssemblyRegStackifyPass(PR);
69   initializeWebAssemblyRegColoringPass(PR);
70   initializeWebAssemblyFixIrreducibleControlFlowPass(PR);
71   initializeWebAssemblyLateEHPreparePass(PR);
72   initializeWebAssemblyExceptionInfoPass(PR);
73   initializeWebAssemblyCFGSortPass(PR);
74   initializeWebAssemblyCFGStackifyPass(PR);
75   initializeWebAssemblyExplicitLocalsPass(PR);
76   initializeWebAssemblyLowerBrUnlessPass(PR);
77   initializeWebAssemblyRegNumberingPass(PR);
78   initializeWebAssemblyPeepholePass(PR);
79   initializeWebAssemblyCallIndirectFixupPass(PR);
80 }
81 
82 //===----------------------------------------------------------------------===//
83 // WebAssembly Lowering public interface.
84 //===----------------------------------------------------------------------===//
85 
86 static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
87   if (!RM.hasValue()) {
88     // Default to static relocation model.  This should always be more optimial
89     // than PIC since the static linker can determine all global addresses and
90     // assume direct function calls.
91     return Reloc::Static;
92   }
93   return *RM;
94 }
95 
96 /// Create an WebAssembly architecture model.
97 ///
98 WebAssemblyTargetMachine::WebAssemblyTargetMachine(
99     const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
100     const TargetOptions &Options, Optional<Reloc::Model> RM,
101     Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT)
102     : LLVMTargetMachine(T,
103                         TT.isArch64Bit() ? "e-m:e-p:64:64-i64:64-n32:64-S128"
104                                          : "e-m:e-p:32:32-i64:64-n32:64-S128",
105                         TT, CPU, FS, Options, getEffectiveRelocModel(RM),
106                         getEffectiveCodeModel(CM, CodeModel::Large), OL),
107       TLOF(new WebAssemblyTargetObjectFile()) {
108   // WebAssembly type-checks instructions, but a noreturn function with a return
109   // type that doesn't match the context will cause a check failure. So we lower
110   // LLVM 'unreachable' to ISD::TRAP and then lower that to WebAssembly's
111   // 'unreachable' instructions which is meant for that case.
112   this->Options.TrapUnreachable = true;
113 
114   // WebAssembly treats each function as an independent unit. Force
115   // -ffunction-sections, effectively, so that we can emit them independently.
116   this->Options.FunctionSections = true;
117   this->Options.DataSections = true;
118   this->Options.UniqueSectionNames = true;
119 
120   initAsmInfo();
121 
122   // Note that we don't use setRequiresStructuredCFG(true). It disables
123   // optimizations than we're ok with, and want, such as critical edge
124   // splitting and tail merging.
125 }
126 
127 WebAssemblyTargetMachine::~WebAssemblyTargetMachine() = default; // anchor.
128 
129 const WebAssemblySubtarget *
130 WebAssemblyTargetMachine::getSubtargetImpl(std::string CPU,
131                                            std::string FS) const {
132   auto &I = SubtargetMap[CPU + FS];
133   if (!I) {
134     I = llvm::make_unique<WebAssemblySubtarget>(TargetTriple, CPU, FS, *this);
135   }
136   return I.get();
137 }
138 
139 const WebAssemblySubtarget *
140 WebAssemblyTargetMachine::getSubtargetImpl(const Function &F) const {
141   Attribute CPUAttr = F.getFnAttribute("target-cpu");
142   Attribute FSAttr = F.getFnAttribute("target-features");
143 
144   std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
145                         ? CPUAttr.getValueAsString().str()
146                         : TargetCPU;
147   std::string FS = !FSAttr.hasAttribute(Attribute::None)
148                        ? FSAttr.getValueAsString().str()
149                        : TargetFS;
150 
151   // This needs to be done before we create a new subtarget since any
152   // creation will depend on the TM and the code generation flags on the
153   // function that reside in TargetOptions.
154   resetTargetOptions(F);
155 
156   return getSubtargetImpl(CPU, FS);
157 }
158 
159 namespace {
160 
161 class CoalesceFeaturesAndStripAtomics final : public ModulePass {
162   // Take the union of all features used in the module and use it for each
163   // function individually, since having multiple feature sets in one module
164   // currently does not make sense for WebAssembly. If atomics are not enabled,
165   // also strip atomic operations and thread local storage.
166   static char ID;
167   WebAssemblyTargetMachine *WasmTM;
168 
169 public:
170   CoalesceFeaturesAndStripAtomics(WebAssemblyTargetMachine *WasmTM)
171       : ModulePass(ID), WasmTM(WasmTM) {}
172 
173   bool runOnModule(Module &M) override {
174     FeatureBitset Features = coalesceFeatures(M);
175 
176     std::string FeatureStr = getFeatureString(Features);
177     for (auto &F : M)
178       replaceFeatures(F, FeatureStr);
179 
180     bool Stripped = false;
181     if (!Features[WebAssembly::FeatureAtomics]) {
182       Stripped |= stripAtomics(M);
183       Stripped |= stripThreadLocals(M);
184     }
185 
186     recordFeatures(M, Features, Stripped);
187 
188     // Conservatively assume we have made some change
189     return true;
190   }
191 
192 private:
193   FeatureBitset coalesceFeatures(const Module &M) {
194     FeatureBitset Features =
195         WasmTM
196             ->getSubtargetImpl(WasmTM->getTargetCPU(),
197                                WasmTM->getTargetFeatureString())
198             ->getFeatureBits();
199     for (auto &F : M)
200       Features |= WasmTM->getSubtargetImpl(F)->getFeatureBits();
201     return Features;
202   }
203 
204   std::string getFeatureString(const FeatureBitset &Features) {
205     std::string Ret;
206     for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) {
207       if (Features[KV.Value])
208         Ret += (StringRef("+") + KV.Key + ",").str();
209     }
210     return Ret;
211   }
212 
213   void replaceFeatures(Function &F, const std::string &Features) {
214     F.removeFnAttr("target-features");
215     F.removeFnAttr("target-cpu");
216     F.addFnAttr("target-features", Features);
217   }
218 
219   bool stripAtomics(Module &M) {
220     // Detect whether any atomics will be lowered, since there is no way to tell
221     // whether the LowerAtomic pass lowers e.g. stores.
222     bool Stripped = false;
223     for (auto &F : M) {
224       for (auto &B : F) {
225         for (auto &I : B) {
226           if (I.isAtomic()) {
227             Stripped = true;
228             goto done;
229           }
230         }
231       }
232     }
233 
234   done:
235     if (!Stripped)
236       return false;
237 
238     LowerAtomicPass Lowerer;
239     FunctionAnalysisManager FAM;
240     for (auto &F : M)
241       Lowerer.run(F, FAM);
242 
243     return true;
244   }
245 
246   bool stripThreadLocals(Module &M) {
247     bool Stripped = false;
248     for (auto &GV : M.globals()) {
249       if (GV.getThreadLocalMode() !=
250           GlobalValue::ThreadLocalMode::NotThreadLocal) {
251         Stripped = true;
252         GV.setThreadLocalMode(GlobalValue::ThreadLocalMode::NotThreadLocal);
253       }
254     }
255     return Stripped;
256   }
257 
258   void recordFeatures(Module &M, const FeatureBitset &Features, bool Stripped) {
259     for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) {
260       std::string MDKey = (StringRef("wasm-feature-") + KV.Key).str();
261       if (KV.Value == WebAssembly::FeatureAtomics && Stripped) {
262         // "atomics" is special: code compiled without atomics may have had its
263         // atomics lowered to nonatomic operations. In that case, atomics is
264         // disallowed to prevent unsafe linking with atomics-enabled objects.
265         assert(!Features[WebAssembly::FeatureAtomics]);
266         M.addModuleFlag(Module::ModFlagBehavior::Error, MDKey,
267                         wasm::WASM_FEATURE_PREFIX_DISALLOWED);
268       } else if (Features[KV.Value]) {
269         // Otherwise features are marked Used or not mentioned
270         M.addModuleFlag(Module::ModFlagBehavior::Error, MDKey,
271                         wasm::WASM_FEATURE_PREFIX_USED);
272       }
273     }
274   }
275 };
276 char CoalesceFeaturesAndStripAtomics::ID = 0;
277 
278 /// WebAssembly Code Generator Pass Configuration Options.
279 class WebAssemblyPassConfig final : public TargetPassConfig {
280 public:
281   WebAssemblyPassConfig(WebAssemblyTargetMachine &TM, PassManagerBase &PM)
282       : TargetPassConfig(TM, PM) {}
283 
284   WebAssemblyTargetMachine &getWebAssemblyTargetMachine() const {
285     return getTM<WebAssemblyTargetMachine>();
286   }
287 
288   FunctionPass *createTargetRegisterAllocator(bool) override;
289 
290   void addIRPasses() override;
291   bool addInstSelector() override;
292   void addPostRegAlloc() override;
293   bool addGCPasses() override { return false; }
294   void addPreEmitPass() override;
295 
296   // No reg alloc
297   bool addRegAssignmentFast() override { return false; }
298 
299   // No reg alloc
300   bool addRegAssignmentOptimized() override { return false; }
301 };
302 } // end anonymous namespace
303 
304 TargetTransformInfo
305 WebAssemblyTargetMachine::getTargetTransformInfo(const Function &F) {
306   return TargetTransformInfo(WebAssemblyTTIImpl(this, F));
307 }
308 
309 TargetPassConfig *
310 WebAssemblyTargetMachine::createPassConfig(PassManagerBase &PM) {
311   return new WebAssemblyPassConfig(*this, PM);
312 }
313 
314 FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) {
315   return nullptr; // No reg alloc
316 }
317 
318 //===----------------------------------------------------------------------===//
319 // The following functions are called from lib/CodeGen/Passes.cpp to modify
320 // the CodeGen pass sequence.
321 //===----------------------------------------------------------------------===//
322 
323 void WebAssemblyPassConfig::addIRPasses() {
324   // Runs LowerAtomicPass if necessary
325   addPass(new CoalesceFeaturesAndStripAtomics(&getWebAssemblyTargetMachine()));
326 
327   // This is a no-op if atomics are not used in the module
328   addPass(createAtomicExpandPass());
329 
330   // Add signatures to prototype-less function declarations
331   addPass(createWebAssemblyAddMissingPrototypes());
332 
333   // Lower .llvm.global_dtors into .llvm_global_ctors with __cxa_atexit calls.
334   addPass(createWebAssemblyLowerGlobalDtors());
335 
336   // Fix function bitcasts, as WebAssembly requires caller and callee signatures
337   // to match.
338   addPass(createWebAssemblyFixFunctionBitcasts());
339 
340   // Optimize "returned" function attributes.
341   if (getOptLevel() != CodeGenOpt::None)
342     addPass(createWebAssemblyOptimizeReturned());
343 
344   // If exception handling is not enabled and setjmp/longjmp handling is
345   // enabled, we lower invokes into calls and delete unreachable landingpad
346   // blocks. Lowering invokes when there is no EH support is done in
347   // TargetPassConfig::addPassesToHandleExceptions, but this runs after this
348   // function and SjLj handling expects all invokes to be lowered before.
349   if (!EnableEmException &&
350       TM->Options.ExceptionModel == ExceptionHandling::None) {
351     addPass(createLowerInvokePass());
352     // The lower invoke pass may create unreachable code. Remove it in order not
353     // to process dead blocks in setjmp/longjmp handling.
354     addPass(createUnreachableBlockEliminationPass());
355   }
356 
357   // Handle exceptions and setjmp/longjmp if enabled.
358   if (EnableEmException || EnableEmSjLj)
359     addPass(createWebAssemblyLowerEmscriptenEHSjLj(EnableEmException,
360                                                    EnableEmSjLj));
361 
362   TargetPassConfig::addIRPasses();
363 }
364 
365 bool WebAssemblyPassConfig::addInstSelector() {
366   (void)TargetPassConfig::addInstSelector();
367   addPass(
368       createWebAssemblyISelDag(getWebAssemblyTargetMachine(), getOptLevel()));
369   // Run the argument-move pass immediately after the ScheduleDAG scheduler
370   // so that we can fix up the ARGUMENT instructions before anything else
371   // sees them in the wrong place.
372   addPass(createWebAssemblyArgumentMove());
373   // Set the p2align operands. This information is present during ISel, however
374   // it's inconvenient to collect. Collect it now, and update the immediate
375   // operands.
376   addPass(createWebAssemblySetP2AlignOperands());
377   return false;
378 }
379 
380 void WebAssemblyPassConfig::addPostRegAlloc() {
381   // TODO: The following CodeGen passes don't currently support code containing
382   // virtual registers. Consider removing their restrictions and re-enabling
383   // them.
384 
385   // These functions all require the NoVRegs property.
386   disablePass(&MachineCopyPropagationID);
387   disablePass(&PostRAMachineSinkingID);
388   disablePass(&PostRASchedulerID);
389   disablePass(&FuncletLayoutID);
390   disablePass(&StackMapLivenessID);
391   disablePass(&LiveDebugValuesID);
392   disablePass(&PatchableFunctionID);
393   disablePass(&ShrinkWrapID);
394 
395   // This pass hurts code size for wasm because it can generate irreducible
396   // control flow.
397   disablePass(&MachineBlockPlacementID);
398 
399   TargetPassConfig::addPostRegAlloc();
400 }
401 
402 void WebAssemblyPassConfig::addPreEmitPass() {
403   TargetPassConfig::addPreEmitPass();
404 
405   // Rewrite pseudo call_indirect instructions as real instructions.
406   // This needs to run before register stackification, because we change the
407   // order of the arguments.
408   addPass(createWebAssemblyCallIndirectFixup());
409 
410   // Eliminate multiple-entry loops.
411   addPass(createWebAssemblyFixIrreducibleControlFlow());
412 
413   // Do various transformations for exception handling.
414   // Every CFG-changing optimizations should come before this.
415   addPass(createWebAssemblyLateEHPrepare());
416 
417   // Now that we have a prologue and epilogue and all frame indices are
418   // rewritten, eliminate SP and FP. This allows them to be stackified,
419   // colored, and numbered with the rest of the registers.
420   addPass(createWebAssemblyReplacePhysRegs());
421 
422   // Preparations and optimizations related to register stackification.
423   if (getOptLevel() != CodeGenOpt::None) {
424     // LiveIntervals isn't commonly run this late. Re-establish preconditions.
425     addPass(createWebAssemblyPrepareForLiveIntervals());
426 
427     // Depend on LiveIntervals and perform some optimizations on it.
428     addPass(createWebAssemblyOptimizeLiveIntervals());
429 
430     // Prepare memory intrinsic calls for register stackifying.
431     addPass(createWebAssemblyMemIntrinsicResults());
432 
433     // Mark registers as representing wasm's value stack. This is a key
434     // code-compression technique in WebAssembly. We run this pass (and
435     // MemIntrinsicResults above) very late, so that it sees as much code as
436     // possible, including code emitted by PEI and expanded by late tail
437     // duplication.
438     addPass(createWebAssemblyRegStackify());
439 
440     // Run the register coloring pass to reduce the total number of registers.
441     // This runs after stackification so that it doesn't consider registers
442     // that become stackified.
443     addPass(createWebAssemblyRegColoring());
444   }
445 
446   // Sort the blocks of the CFG into topological order, a prerequisite for
447   // BLOCK and LOOP markers.
448   addPass(createWebAssemblyCFGSort());
449 
450   // Insert BLOCK and LOOP markers.
451   addPass(createWebAssemblyCFGStackify());
452 
453   // Insert explicit local.get and local.set operators.
454   addPass(createWebAssemblyExplicitLocals());
455 
456   // Lower br_unless into br_if.
457   addPass(createWebAssemblyLowerBrUnless());
458 
459   // Perform the very last peephole optimizations on the code.
460   if (getOptLevel() != CodeGenOpt::None)
461     addPass(createWebAssemblyPeephole());
462 
463   // Create a mapping from LLVM CodeGen virtual registers to wasm registers.
464   addPass(createWebAssemblyRegNumbering());
465 }
466 
467 yaml::MachineFunctionInfo *
468 WebAssemblyTargetMachine::createDefaultFuncInfoYAML() const {
469   return new yaml::WebAssemblyFunctionInfo();
470 }
471 
472 yaml::MachineFunctionInfo *WebAssemblyTargetMachine::convertFuncInfoToYAML(
473     const MachineFunction &MF) const {
474   const auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
475   return new yaml::WebAssemblyFunctionInfo(*MFI);
476 }
477 
478 bool WebAssemblyTargetMachine::parseMachineFunctionInfo(
479     const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS,
480     SMDiagnostic &Error, SMRange &SourceRange) const {
481   const auto &YamlMFI =
482       reinterpret_cast<const yaml::WebAssemblyFunctionInfo &>(MFI);
483   MachineFunction &MF = PFS.MF;
484   MF.getInfo<WebAssemblyFunctionInfo>()->initializeBaseYamlFields(YamlMFI);
485   return false;
486 }
487