124e2fe98SDimitry Andric //===-- WebAssemblyFixFunctionBitcasts.cpp - Fix function bitcasts --------===//
224e2fe98SDimitry Andric //
324e2fe98SDimitry Andric // The LLVM Compiler Infrastructure
424e2fe98SDimitry Andric //
524e2fe98SDimitry Andric // This file is distributed under the University of Illinois Open Source
624e2fe98SDimitry Andric // License. See LICENSE.TXT for details.
724e2fe98SDimitry Andric //
824e2fe98SDimitry Andric //===----------------------------------------------------------------------===//
924e2fe98SDimitry Andric ///
1024e2fe98SDimitry Andric /// \file
114ba319b5SDimitry Andric /// Fix bitcasted functions.
1224e2fe98SDimitry Andric ///
1324e2fe98SDimitry Andric /// WebAssembly requires caller and callee signatures to match, however in LLVM,
1424e2fe98SDimitry Andric /// some amount of slop is vaguely permitted. Detect mismatch by looking for
1524e2fe98SDimitry Andric /// bitcasts of functions and rewrite them to use wrapper functions instead.
1624e2fe98SDimitry Andric ///
1724e2fe98SDimitry Andric /// This doesn't catch all cases, such as when a function's address is taken in
1824e2fe98SDimitry Andric /// one place and casted in another, but it works for many common cases.
1924e2fe98SDimitry Andric ///
2024e2fe98SDimitry Andric /// Note that LLVM already optimizes away function bitcasts in common cases by
2124e2fe98SDimitry Andric /// dropping arguments as needed, so this pass only ends up getting used in less
2224e2fe98SDimitry Andric /// common cases.
2324e2fe98SDimitry Andric ///
2424e2fe98SDimitry Andric //===----------------------------------------------------------------------===//
2524e2fe98SDimitry Andric
2624e2fe98SDimitry Andric #include "WebAssembly.h"
272cab237bSDimitry Andric #include "llvm/IR/CallSite.h"
2824e2fe98SDimitry Andric #include "llvm/IR/Constants.h"
2924e2fe98SDimitry Andric #include "llvm/IR/Instructions.h"
3024e2fe98SDimitry Andric #include "llvm/IR/Module.h"
3124e2fe98SDimitry Andric #include "llvm/IR/Operator.h"
3224e2fe98SDimitry Andric #include "llvm/Pass.h"
3324e2fe98SDimitry Andric #include "llvm/Support/Debug.h"
3424e2fe98SDimitry Andric #include "llvm/Support/raw_ostream.h"
3524e2fe98SDimitry Andric using namespace llvm;
3624e2fe98SDimitry Andric
3724e2fe98SDimitry Andric #define DEBUG_TYPE "wasm-fix-function-bitcasts"
3824e2fe98SDimitry Andric
3924e2fe98SDimitry Andric namespace {
4024e2fe98SDimitry Andric class FixFunctionBitcasts final : public ModulePass {
getPassName() const4124e2fe98SDimitry Andric StringRef getPassName() const override {
4224e2fe98SDimitry Andric return "WebAssembly Fix Function Bitcasts";
4324e2fe98SDimitry Andric }
4424e2fe98SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const4524e2fe98SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
4624e2fe98SDimitry Andric AU.setPreservesCFG();
4724e2fe98SDimitry Andric ModulePass::getAnalysisUsage(AU);
4824e2fe98SDimitry Andric }
4924e2fe98SDimitry Andric
5024e2fe98SDimitry Andric bool runOnModule(Module &M) override;
5124e2fe98SDimitry Andric
5224e2fe98SDimitry Andric public:
5324e2fe98SDimitry Andric static char ID;
FixFunctionBitcasts()5424e2fe98SDimitry Andric FixFunctionBitcasts() : ModulePass(ID) {}
5524e2fe98SDimitry Andric };
5624e2fe98SDimitry Andric } // End anonymous namespace
5724e2fe98SDimitry Andric
5824e2fe98SDimitry Andric char FixFunctionBitcasts::ID = 0;
594ba319b5SDimitry Andric INITIALIZE_PASS(FixFunctionBitcasts, DEBUG_TYPE,
604ba319b5SDimitry Andric "Fix mismatching bitcasts for WebAssembly", false, false)
614ba319b5SDimitry Andric
createWebAssemblyFixFunctionBitcasts()6224e2fe98SDimitry Andric ModulePass *llvm::createWebAssemblyFixFunctionBitcasts() {
6324e2fe98SDimitry Andric return new FixFunctionBitcasts();
6424e2fe98SDimitry Andric }
6524e2fe98SDimitry Andric
6624e2fe98SDimitry Andric // Recursively descend the def-use lists from V to find non-bitcast users of
6724e2fe98SDimitry Andric // bitcasts of V.
FindUses(Value * V,Function & F,SmallVectorImpl<std::pair<Use *,Function * >> & Uses,SmallPtrSetImpl<Constant * > & ConstantBCs)6824e2fe98SDimitry Andric static void FindUses(Value *V, Function &F,
69f1a29dd3SDimitry Andric SmallVectorImpl<std::pair<Use *, Function *>> &Uses,
70f1a29dd3SDimitry Andric SmallPtrSetImpl<Constant *> &ConstantBCs) {
7124e2fe98SDimitry Andric for (Use &U : V->uses()) {
7224e2fe98SDimitry Andric if (BitCastOperator *BC = dyn_cast<BitCastOperator>(U.getUser()))
73f1a29dd3SDimitry Andric FindUses(BC, F, Uses, ConstantBCs);
74f1a29dd3SDimitry Andric else if (U.get()->getType() != F.getType()) {
752cab237bSDimitry Andric CallSite CS(U.getUser());
762cab237bSDimitry Andric if (!CS)
772cab237bSDimitry Andric // Skip uses that aren't immediately called
782cab237bSDimitry Andric continue;
792cab237bSDimitry Andric Value *Callee = CS.getCalledValue();
802cab237bSDimitry Andric if (Callee != V)
812cab237bSDimitry Andric // Skip calls where the function isn't the callee
822cab237bSDimitry Andric continue;
83f1a29dd3SDimitry Andric if (isa<Constant>(U.get())) {
84f1a29dd3SDimitry Andric // Only add constant bitcasts to the list once; they get RAUW'd
85f1a29dd3SDimitry Andric auto c = ConstantBCs.insert(cast<Constant>(U.get()));
862cab237bSDimitry Andric if (!c.second)
872cab237bSDimitry Andric continue;
88f1a29dd3SDimitry Andric }
8924e2fe98SDimitry Andric Uses.push_back(std::make_pair(&U, &F));
9024e2fe98SDimitry Andric }
9124e2fe98SDimitry Andric }
92f1a29dd3SDimitry Andric }
9324e2fe98SDimitry Andric
9424e2fe98SDimitry Andric // Create a wrapper function with type Ty that calls F (which may have a
9524e2fe98SDimitry Andric // different type). Attempt to support common bitcasted function idioms:
9624e2fe98SDimitry Andric // - Call with more arguments than needed: arguments are dropped
9724e2fe98SDimitry Andric // - Call with fewer arguments than needed: arguments are filled in with undef
9824e2fe98SDimitry Andric // - Return value is not needed: drop it
9924e2fe98SDimitry Andric // - Return value needed but not present: supply an undef
10024e2fe98SDimitry Andric //
101*b5893f02SDimitry Andric // If the all the argument types of trivially castable to one another (i.e.
102*b5893f02SDimitry Andric // I32 vs pointer type) then we don't create a wrapper at all (return nullptr
103*b5893f02SDimitry Andric // instead).
104*b5893f02SDimitry Andric //
105*b5893f02SDimitry Andric // If there is a type mismatch that we know would result in an invalid wasm
106*b5893f02SDimitry Andric // module then generate wrapper that contains unreachable (i.e. abort at
107*b5893f02SDimitry Andric // runtime). Such programs are deep into undefined behaviour territory,
108*b5893f02SDimitry Andric // but we choose to fail at runtime rather than generate and invalid module
109*b5893f02SDimitry Andric // or fail at compiler time. The reason we delay the error is that we want
110*b5893f02SDimitry Andric // to support the CMake which expects to be able to compile and link programs
111*b5893f02SDimitry Andric // that refer to functions with entirely incorrect signatures (this is how
112*b5893f02SDimitry Andric // CMake detects the existence of a function in a toolchain).
113*b5893f02SDimitry Andric //
114*b5893f02SDimitry Andric // For bitcasts that involve struct types we don't know at this stage if they
115*b5893f02SDimitry Andric // would be equivalent at the wasm level and so we can't know if we need to
116*b5893f02SDimitry Andric // generate a wrapper.
CreateWrapper(Function * F,FunctionType * Ty)11724e2fe98SDimitry Andric static Function *CreateWrapper(Function *F, FunctionType *Ty) {
11824e2fe98SDimitry Andric Module *M = F->getParent();
11924e2fe98SDimitry Andric
120*b5893f02SDimitry Andric Function *Wrapper = Function::Create(Ty, Function::PrivateLinkage,
121*b5893f02SDimitry Andric F->getName() + "_bitcast", M);
12224e2fe98SDimitry Andric BasicBlock *BB = BasicBlock::Create(M->getContext(), "body", Wrapper);
123*b5893f02SDimitry Andric const DataLayout &DL = BB->getModule()->getDataLayout();
12424e2fe98SDimitry Andric
12524e2fe98SDimitry Andric // Determine what arguments to pass.
12624e2fe98SDimitry Andric SmallVector<Value *, 4> Args;
12724e2fe98SDimitry Andric Function::arg_iterator AI = Wrapper->arg_begin();
1282cab237bSDimitry Andric Function::arg_iterator AE = Wrapper->arg_end();
12924e2fe98SDimitry Andric FunctionType::param_iterator PI = F->getFunctionType()->param_begin();
13024e2fe98SDimitry Andric FunctionType::param_iterator PE = F->getFunctionType()->param_end();
131*b5893f02SDimitry Andric bool TypeMismatch = false;
132*b5893f02SDimitry Andric bool WrapperNeeded = false;
133*b5893f02SDimitry Andric
134*b5893f02SDimitry Andric Type *ExpectedRtnType = F->getFunctionType()->getReturnType();
135*b5893f02SDimitry Andric Type *RtnType = Ty->getReturnType();
136*b5893f02SDimitry Andric
137*b5893f02SDimitry Andric if ((F->getFunctionType()->getNumParams() != Ty->getNumParams()) ||
138*b5893f02SDimitry Andric (F->getFunctionType()->isVarArg() != Ty->isVarArg()) ||
139*b5893f02SDimitry Andric (ExpectedRtnType != RtnType))
140*b5893f02SDimitry Andric WrapperNeeded = true;
141*b5893f02SDimitry Andric
1422cab237bSDimitry Andric for (; AI != AE && PI != PE; ++AI, ++PI) {
143*b5893f02SDimitry Andric Type *ArgType = AI->getType();
144*b5893f02SDimitry Andric Type *ParamType = *PI;
145*b5893f02SDimitry Andric
146*b5893f02SDimitry Andric if (ArgType == ParamType) {
14724e2fe98SDimitry Andric Args.push_back(&*AI);
148*b5893f02SDimitry Andric } else {
149*b5893f02SDimitry Andric if (CastInst::isBitOrNoopPointerCastable(ArgType, ParamType, DL)) {
150*b5893f02SDimitry Andric Instruction *PtrCast =
151*b5893f02SDimitry Andric CastInst::CreateBitOrPointerCast(AI, ParamType, "cast");
152*b5893f02SDimitry Andric BB->getInstList().push_back(PtrCast);
153*b5893f02SDimitry Andric Args.push_back(PtrCast);
154*b5893f02SDimitry Andric } else if (ArgType->isStructTy() || ParamType->isStructTy()) {
155*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << "CreateWrapper: struct param type in bitcast: "
156*b5893f02SDimitry Andric << F->getName() << "\n");
157*b5893f02SDimitry Andric WrapperNeeded = false;
158*b5893f02SDimitry Andric } else {
159*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << "CreateWrapper: arg type mismatch calling: "
160*b5893f02SDimitry Andric << F->getName() << "\n");
161*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << "Arg[" << Args.size() << "] Expected: "
162*b5893f02SDimitry Andric << *ParamType << " Got: " << *ArgType << "\n");
163*b5893f02SDimitry Andric TypeMismatch = true;
164*b5893f02SDimitry Andric break;
16524e2fe98SDimitry Andric }
166*b5893f02SDimitry Andric }
167*b5893f02SDimitry Andric }
168*b5893f02SDimitry Andric
169*b5893f02SDimitry Andric if (WrapperNeeded && !TypeMismatch) {
17024e2fe98SDimitry Andric for (; PI != PE; ++PI)
17124e2fe98SDimitry Andric Args.push_back(UndefValue::get(*PI));
1722cab237bSDimitry Andric if (F->isVarArg())
1732cab237bSDimitry Andric for (; AI != AE; ++AI)
1742cab237bSDimitry Andric Args.push_back(&*AI);
17524e2fe98SDimitry Andric
17624e2fe98SDimitry Andric CallInst *Call = CallInst::Create(F, Args, "", BB);
17724e2fe98SDimitry Andric
178*b5893f02SDimitry Andric Type *ExpectedRtnType = F->getFunctionType()->getReturnType();
179*b5893f02SDimitry Andric Type *RtnType = Ty->getReturnType();
18024e2fe98SDimitry Andric // Determine what value to return.
181*b5893f02SDimitry Andric if (RtnType->isVoidTy()) {
18224e2fe98SDimitry Andric ReturnInst::Create(M->getContext(), BB);
183*b5893f02SDimitry Andric } else if (ExpectedRtnType->isVoidTy()) {
184*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << "Creating dummy return: " << *RtnType << "\n");
185*b5893f02SDimitry Andric ReturnInst::Create(M->getContext(), UndefValue::get(RtnType), BB);
186*b5893f02SDimitry Andric } else if (RtnType == ExpectedRtnType) {
18724e2fe98SDimitry Andric ReturnInst::Create(M->getContext(), Call, BB);
188*b5893f02SDimitry Andric } else if (CastInst::isBitOrNoopPointerCastable(ExpectedRtnType, RtnType,
189*b5893f02SDimitry Andric DL)) {
190*b5893f02SDimitry Andric Instruction *Cast =
191*b5893f02SDimitry Andric CastInst::CreateBitOrPointerCast(Call, RtnType, "cast");
192*b5893f02SDimitry Andric BB->getInstList().push_back(Cast);
193*b5893f02SDimitry Andric ReturnInst::Create(M->getContext(), Cast, BB);
194*b5893f02SDimitry Andric } else if (RtnType->isStructTy() || ExpectedRtnType->isStructTy()) {
195*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << "CreateWrapper: struct return type in bitcast: "
196*b5893f02SDimitry Andric << F->getName() << "\n");
197*b5893f02SDimitry Andric WrapperNeeded = false;
198*b5893f02SDimitry Andric } else {
199*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << "CreateWrapper: return type mismatch calling: "
200*b5893f02SDimitry Andric << F->getName() << "\n");
201*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << "Expected: " << *ExpectedRtnType
202*b5893f02SDimitry Andric << " Got: " << *RtnType << "\n");
203*b5893f02SDimitry Andric TypeMismatch = true;
204*b5893f02SDimitry Andric }
205*b5893f02SDimitry Andric }
206*b5893f02SDimitry Andric
207*b5893f02SDimitry Andric if (TypeMismatch) {
208*b5893f02SDimitry Andric // Create a new wrapper that simply contains `unreachable`.
209*b5893f02SDimitry Andric Wrapper->eraseFromParent();
210*b5893f02SDimitry Andric Wrapper = Function::Create(Ty, Function::PrivateLinkage,
211*b5893f02SDimitry Andric F->getName() + "_bitcast_invalid", M);
212*b5893f02SDimitry Andric BasicBlock *BB = BasicBlock::Create(M->getContext(), "body", Wrapper);
213*b5893f02SDimitry Andric new UnreachableInst(M->getContext(), BB);
214*b5893f02SDimitry Andric Wrapper->setName(F->getName() + "_bitcast_invalid");
215*b5893f02SDimitry Andric } else if (!WrapperNeeded) {
216*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << "CreateWrapper: no wrapper needed: " << F->getName()
217*b5893f02SDimitry Andric << "\n");
21824e2fe98SDimitry Andric Wrapper->eraseFromParent();
21924e2fe98SDimitry Andric return nullptr;
22024e2fe98SDimitry Andric }
221*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << "CreateWrapper: " << F->getName() << "\n");
22224e2fe98SDimitry Andric return Wrapper;
22324e2fe98SDimitry Andric }
22424e2fe98SDimitry Andric
225*b5893f02SDimitry Andric // Test whether a main function with type FuncTy should be rewritten to have
226*b5893f02SDimitry Andric // type MainTy.
shouldFixMainFunction(FunctionType * FuncTy,FunctionType * MainTy)227*b5893f02SDimitry Andric bool shouldFixMainFunction(FunctionType *FuncTy, FunctionType *MainTy) {
228*b5893f02SDimitry Andric // Only fix the main function if it's the standard zero-arg form. That way,
229*b5893f02SDimitry Andric // the standard cases will work as expected, and users will see signature
230*b5893f02SDimitry Andric // mismatches from the linker for non-standard cases.
231*b5893f02SDimitry Andric return FuncTy->getReturnType() == MainTy->getReturnType() &&
232*b5893f02SDimitry Andric FuncTy->getNumParams() == 0 &&
233*b5893f02SDimitry Andric !FuncTy->isVarArg();
234*b5893f02SDimitry Andric }
235*b5893f02SDimitry Andric
runOnModule(Module & M)23624e2fe98SDimitry Andric bool FixFunctionBitcasts::runOnModule(Module &M) {
237*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << "********** Fix Function Bitcasts **********\n");
238*b5893f02SDimitry Andric
2392cab237bSDimitry Andric Function *Main = nullptr;
2402cab237bSDimitry Andric CallInst *CallMain = nullptr;
24124e2fe98SDimitry Andric SmallVector<std::pair<Use *, Function *>, 0> Uses;
242f1a29dd3SDimitry Andric SmallPtrSet<Constant *, 2> ConstantBCs;
24324e2fe98SDimitry Andric
24424e2fe98SDimitry Andric // Collect all the places that need wrappers.
2452cab237bSDimitry Andric for (Function &F : M) {
2462cab237bSDimitry Andric FindUses(&F, F, Uses, ConstantBCs);
2472cab237bSDimitry Andric
2482cab237bSDimitry Andric // If we have a "main" function, and its type isn't
2492cab237bSDimitry Andric // "int main(int argc, char *argv[])", create an artificial call with it
2502cab237bSDimitry Andric // bitcasted to that type so that we generate a wrapper for it, so that
2512cab237bSDimitry Andric // the C runtime can call it.
252*b5893f02SDimitry Andric if (F.getName() == "main") {
2532cab237bSDimitry Andric Main = &F;
2542cab237bSDimitry Andric LLVMContext &C = M.getContext();
255*b5893f02SDimitry Andric Type *MainArgTys[] = {Type::getInt32Ty(C),
256*b5893f02SDimitry Andric PointerType::get(Type::getInt8PtrTy(C), 0)};
2572cab237bSDimitry Andric FunctionType *MainTy = FunctionType::get(Type::getInt32Ty(C), MainArgTys,
2582cab237bSDimitry Andric /*isVarArg=*/false);
259*b5893f02SDimitry Andric if (shouldFixMainFunction(F.getFunctionType(), MainTy)) {
260*b5893f02SDimitry Andric LLVM_DEBUG(dbgs() << "Found `main` function with incorrect type: "
261*b5893f02SDimitry Andric << *F.getFunctionType() << "\n");
262*b5893f02SDimitry Andric Value *Args[] = {UndefValue::get(MainArgTys[0]),
263*b5893f02SDimitry Andric UndefValue::get(MainArgTys[1])};
264*b5893f02SDimitry Andric Value *Casted =
265*b5893f02SDimitry Andric ConstantExpr::getBitCast(Main, PointerType::get(MainTy, 0));
2662cab237bSDimitry Andric CallMain = CallInst::Create(Casted, Args, "call_main");
2672cab237bSDimitry Andric Use *UseMain = &CallMain->getOperandUse(2);
2682cab237bSDimitry Andric Uses.push_back(std::make_pair(UseMain, &F));
2692cab237bSDimitry Andric }
2702cab237bSDimitry Andric }
2712cab237bSDimitry Andric }
27224e2fe98SDimitry Andric
27324e2fe98SDimitry Andric DenseMap<std::pair<Function *, FunctionType *>, Function *> Wrappers;
27424e2fe98SDimitry Andric
27524e2fe98SDimitry Andric for (auto &UseFunc : Uses) {
27624e2fe98SDimitry Andric Use *U = UseFunc.first;
27724e2fe98SDimitry Andric Function *F = UseFunc.second;
27824e2fe98SDimitry Andric PointerType *PTy = cast<PointerType>(U->get()->getType());
27924e2fe98SDimitry Andric FunctionType *Ty = dyn_cast<FunctionType>(PTy->getElementType());
28024e2fe98SDimitry Andric
28124e2fe98SDimitry Andric // If the function is casted to something like i8* as a "generic pointer"
28224e2fe98SDimitry Andric // to be later casted to something else, we can't generate a wrapper for it.
28324e2fe98SDimitry Andric // Just ignore such casts for now.
28424e2fe98SDimitry Andric if (!Ty)
2857a7e6055SDimitry Andric continue;
2867a7e6055SDimitry Andric
28724e2fe98SDimitry Andric auto Pair = Wrappers.insert(std::make_pair(std::make_pair(F, Ty), nullptr));
28824e2fe98SDimitry Andric if (Pair.second)
28924e2fe98SDimitry Andric Pair.first->second = CreateWrapper(F, Ty);
29024e2fe98SDimitry Andric
29124e2fe98SDimitry Andric Function *Wrapper = Pair.first->second;
29224e2fe98SDimitry Andric if (!Wrapper)
29324e2fe98SDimitry Andric continue;
29424e2fe98SDimitry Andric
29524e2fe98SDimitry Andric if (isa<Constant>(U->get()))
29624e2fe98SDimitry Andric U->get()->replaceAllUsesWith(Wrapper);
29724e2fe98SDimitry Andric else
29824e2fe98SDimitry Andric U->set(Wrapper);
29924e2fe98SDimitry Andric }
30024e2fe98SDimitry Andric
3012cab237bSDimitry Andric // If we created a wrapper for main, rename the wrapper so that it's the
3022cab237bSDimitry Andric // one that gets called from startup.
3032cab237bSDimitry Andric if (CallMain) {
3042cab237bSDimitry Andric Main->setName("__original_main");
3052cab237bSDimitry Andric Function *MainWrapper =
3062cab237bSDimitry Andric cast<Function>(CallMain->getCalledValue()->stripPointerCasts());
307*b5893f02SDimitry Andric delete CallMain;
308*b5893f02SDimitry Andric if (Main->isDeclaration()) {
309*b5893f02SDimitry Andric // The wrapper is not needed in this case as we don't need to export
310*b5893f02SDimitry Andric // it to anyone else.
311*b5893f02SDimitry Andric MainWrapper->eraseFromParent();
312*b5893f02SDimitry Andric } else {
313*b5893f02SDimitry Andric // Otherwise give the wrapper the same linkage as the original main
314*b5893f02SDimitry Andric // function, so that it can be called from the same places.
3152cab237bSDimitry Andric MainWrapper->setName("main");
3162cab237bSDimitry Andric MainWrapper->setLinkage(Main->getLinkage());
3172cab237bSDimitry Andric MainWrapper->setVisibility(Main->getVisibility());
318*b5893f02SDimitry Andric }
3192cab237bSDimitry Andric }
3202cab237bSDimitry Andric
32124e2fe98SDimitry Andric return true;
32224e2fe98SDimitry Andric }
323