1f72a8d4eSPeter Collingbourne //===- GlobalSplit.cpp - global variable splitter -------------------------===//
2f72a8d4eSPeter Collingbourne //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6f72a8d4eSPeter Collingbourne //
7f72a8d4eSPeter Collingbourne //===----------------------------------------------------------------------===//
8f72a8d4eSPeter Collingbourne //
9f72a8d4eSPeter Collingbourne // This pass uses inrange annotations on GEP indices to split globals where
10f72a8d4eSPeter Collingbourne // beneficial. Clang currently attaches these annotations to references to
11f72a8d4eSPeter Collingbourne // virtual table globals under the Itanium ABI for the benefit of the
12f72a8d4eSPeter Collingbourne // whole-program virtual call optimization and control flow integrity passes.
13f72a8d4eSPeter Collingbourne //
14f72a8d4eSPeter Collingbourne //===----------------------------------------------------------------------===//
15f72a8d4eSPeter Collingbourne
162ae76dd2SDavide Italiano #include "llvm/Transforms/IPO/GlobalSplit.h"
17e9ea08a0SEugene Zelenko #include "llvm/ADT/SmallVector.h"
18f72a8d4eSPeter Collingbourne #include "llvm/ADT/StringExtras.h"
19e9ea08a0SEugene Zelenko #include "llvm/IR/Constant.h"
20f72a8d4eSPeter Collingbourne #include "llvm/IR/Constants.h"
21e9ea08a0SEugene Zelenko #include "llvm/IR/DataLayout.h"
22e9ea08a0SEugene Zelenko #include "llvm/IR/Function.h"
23e9ea08a0SEugene Zelenko #include "llvm/IR/GlobalValue.h"
24f72a8d4eSPeter Collingbourne #include "llvm/IR/GlobalVariable.h"
25f72a8d4eSPeter Collingbourne #include "llvm/IR/Intrinsics.h"
26e9ea08a0SEugene Zelenko #include "llvm/IR/LLVMContext.h"
27e9ea08a0SEugene Zelenko #include "llvm/IR/Metadata.h"
28f72a8d4eSPeter Collingbourne #include "llvm/IR/Module.h"
29f72a8d4eSPeter Collingbourne #include "llvm/IR/Operator.h"
30e9ea08a0SEugene Zelenko #include "llvm/IR/Type.h"
31e9ea08a0SEugene Zelenko #include "llvm/IR/User.h"
3205da2fe5SReid Kleckner #include "llvm/InitializePasses.h"
33f72a8d4eSPeter Collingbourne #include "llvm/Pass.h"
34e9ea08a0SEugene Zelenko #include "llvm/Support/Casting.h"
356bda14b3SChandler Carruth #include "llvm/Transforms/IPO.h"
36e9ea08a0SEugene Zelenko #include <cstdint>
37e9ea08a0SEugene Zelenko #include <vector>
38f72a8d4eSPeter Collingbourne
39f72a8d4eSPeter Collingbourne using namespace llvm;
40f72a8d4eSPeter Collingbourne
splitGlobal(GlobalVariable & GV)41e9ea08a0SEugene Zelenko static bool splitGlobal(GlobalVariable &GV) {
42f72a8d4eSPeter Collingbourne // If the address of the global is taken outside of the module, we cannot
43f72a8d4eSPeter Collingbourne // apply this transformation.
44f72a8d4eSPeter Collingbourne if (!GV.hasLocalLinkage())
45f72a8d4eSPeter Collingbourne return false;
46f72a8d4eSPeter Collingbourne
47f72a8d4eSPeter Collingbourne // We currently only know how to split ConstantStructs.
48f72a8d4eSPeter Collingbourne auto *Init = dyn_cast_or_null<ConstantStruct>(GV.getInitializer());
49f72a8d4eSPeter Collingbourne if (!Init)
50f72a8d4eSPeter Collingbourne return false;
51f72a8d4eSPeter Collingbourne
52f72a8d4eSPeter Collingbourne // Verify that each user of the global is an inrange getelementptr constant.
53f72a8d4eSPeter Collingbourne // From this it follows that any loads from or stores to that global must use
54f72a8d4eSPeter Collingbourne // a pointer derived from an inrange getelementptr constant, which is
55f72a8d4eSPeter Collingbourne // sufficient to allow us to apply the splitting transform.
56f72a8d4eSPeter Collingbourne for (User *U : GV.users()) {
57f72a8d4eSPeter Collingbourne if (!isa<Constant>(U))
58f72a8d4eSPeter Collingbourne return false;
59f72a8d4eSPeter Collingbourne
60f72a8d4eSPeter Collingbourne auto *GEP = dyn_cast<GEPOperator>(U);
61f72a8d4eSPeter Collingbourne if (!GEP || !GEP->getInRangeIndex() || *GEP->getInRangeIndex() != 1 ||
62f72a8d4eSPeter Collingbourne !isa<ConstantInt>(GEP->getOperand(1)) ||
63f72a8d4eSPeter Collingbourne !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
64f72a8d4eSPeter Collingbourne !isa<ConstantInt>(GEP->getOperand(2)))
65f72a8d4eSPeter Collingbourne return false;
66f72a8d4eSPeter Collingbourne }
67f72a8d4eSPeter Collingbourne
68f72a8d4eSPeter Collingbourne SmallVector<MDNode *, 2> Types;
69f72a8d4eSPeter Collingbourne GV.getMetadata(LLVMContext::MD_type, Types);
70f72a8d4eSPeter Collingbourne
71f72a8d4eSPeter Collingbourne const DataLayout &DL = GV.getParent()->getDataLayout();
72f72a8d4eSPeter Collingbourne const StructLayout *SL = DL.getStructLayout(Init->getType());
73f72a8d4eSPeter Collingbourne
74f72a8d4eSPeter Collingbourne IntegerType *Int32Ty = Type::getInt32Ty(GV.getContext());
75f72a8d4eSPeter Collingbourne
76f72a8d4eSPeter Collingbourne std::vector<GlobalVariable *> SplitGlobals(Init->getNumOperands());
77f72a8d4eSPeter Collingbourne for (unsigned I = 0; I != Init->getNumOperands(); ++I) {
78f72a8d4eSPeter Collingbourne // Build a global representing this split piece.
79f72a8d4eSPeter Collingbourne auto *SplitGV =
80f72a8d4eSPeter Collingbourne new GlobalVariable(*GV.getParent(), Init->getOperand(I)->getType(),
81f72a8d4eSPeter Collingbourne GV.isConstant(), GlobalValue::PrivateLinkage,
82f72a8d4eSPeter Collingbourne Init->getOperand(I), GV.getName() + "." + utostr(I));
83f72a8d4eSPeter Collingbourne SplitGlobals[I] = SplitGV;
84f72a8d4eSPeter Collingbourne
85f72a8d4eSPeter Collingbourne unsigned SplitBegin = SL->getElementOffset(I);
86f72a8d4eSPeter Collingbourne unsigned SplitEnd = (I == Init->getNumOperands() - 1)
87f72a8d4eSPeter Collingbourne ? SL->getSizeInBytes()
88f72a8d4eSPeter Collingbourne : SL->getElementOffset(I + 1);
89f72a8d4eSPeter Collingbourne
90f72a8d4eSPeter Collingbourne // Rebuild type metadata, adjusting by the split offset.
91f72a8d4eSPeter Collingbourne // FIXME: See if we can use DW_OP_piece to preserve debug metadata here.
92f72a8d4eSPeter Collingbourne for (MDNode *Type : Types) {
93f72a8d4eSPeter Collingbourne uint64_t ByteOffset = cast<ConstantInt>(
94f72a8d4eSPeter Collingbourne cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
95f72a8d4eSPeter Collingbourne ->getZExtValue();
967a5cfa9aSEvgeniy Stepanov // Type metadata may be attached one byte after the end of the vtable, for
977a5cfa9aSEvgeniy Stepanov // classes without virtual methods in Itanium ABI. AFAIK, it is never
987a5cfa9aSEvgeniy Stepanov // attached to the first byte of a vtable. Subtract one to get the right
997a5cfa9aSEvgeniy Stepanov // slice.
1007a5cfa9aSEvgeniy Stepanov // This is making an assumption that vtable groups are the only kinds of
1017a5cfa9aSEvgeniy Stepanov // global variables that !type metadata can be attached to, and that they
1027a5cfa9aSEvgeniy Stepanov // are either Itanium ABI vtable groups or contain a single vtable (i.e.
1037a5cfa9aSEvgeniy Stepanov // Microsoft ABI vtables).
1047a5cfa9aSEvgeniy Stepanov uint64_t AttachedTo = (ByteOffset == 0) ? ByteOffset : ByteOffset - 1;
1057a5cfa9aSEvgeniy Stepanov if (AttachedTo < SplitBegin || AttachedTo >= SplitEnd)
106f72a8d4eSPeter Collingbourne continue;
107f72a8d4eSPeter Collingbourne SplitGV->addMetadata(
108f72a8d4eSPeter Collingbourne LLVMContext::MD_type,
109f72a8d4eSPeter Collingbourne *MDNode::get(GV.getContext(),
110f72a8d4eSPeter Collingbourne {ConstantAsMetadata::get(
111f72a8d4eSPeter Collingbourne ConstantInt::get(Int32Ty, ByteOffset - SplitBegin)),
112f72a8d4eSPeter Collingbourne Type->getOperand(1)}));
113f72a8d4eSPeter Collingbourne }
114458676dbSTeresa Johnson
115458676dbSTeresa Johnson if (GV.hasMetadata(LLVMContext::MD_vcall_visibility))
116458676dbSTeresa Johnson SplitGV->setVCallVisibilityMetadata(GV.getVCallVisibility());
117f72a8d4eSPeter Collingbourne }
118f72a8d4eSPeter Collingbourne
119f72a8d4eSPeter Collingbourne for (User *U : GV.users()) {
120f72a8d4eSPeter Collingbourne auto *GEP = cast<GEPOperator>(U);
121f72a8d4eSPeter Collingbourne unsigned I = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
122f72a8d4eSPeter Collingbourne if (I >= SplitGlobals.size())
123f72a8d4eSPeter Collingbourne continue;
124f72a8d4eSPeter Collingbourne
125f72a8d4eSPeter Collingbourne SmallVector<Value *, 4> Ops;
126f72a8d4eSPeter Collingbourne Ops.push_back(ConstantInt::get(Int32Ty, 0));
127f72a8d4eSPeter Collingbourne for (unsigned I = 3; I != GEP->getNumOperands(); ++I)
128f72a8d4eSPeter Collingbourne Ops.push_back(GEP->getOperand(I));
129f72a8d4eSPeter Collingbourne
130f72a8d4eSPeter Collingbourne auto *NewGEP = ConstantExpr::getGetElementPtr(
131f72a8d4eSPeter Collingbourne SplitGlobals[I]->getInitializer()->getType(), SplitGlobals[I], Ops,
132f72a8d4eSPeter Collingbourne GEP->isInBounds());
133f72a8d4eSPeter Collingbourne GEP->replaceAllUsesWith(NewGEP);
134f72a8d4eSPeter Collingbourne }
135f72a8d4eSPeter Collingbourne
136f72a8d4eSPeter Collingbourne // Finally, remove the original global. Any remaining uses refer to invalid
137*373571dbSNuno Lopes // elements of the global, so replace with poison.
138f72a8d4eSPeter Collingbourne if (!GV.use_empty())
139*373571dbSNuno Lopes GV.replaceAllUsesWith(PoisonValue::get(GV.getType()));
140f72a8d4eSPeter Collingbourne GV.eraseFromParent();
141f72a8d4eSPeter Collingbourne return true;
142f72a8d4eSPeter Collingbourne }
143f72a8d4eSPeter Collingbourne
splitGlobals(Module & M)144e9ea08a0SEugene Zelenko static bool splitGlobals(Module &M) {
145f72a8d4eSPeter Collingbourne // First, see if the module uses either of the llvm.type.test or
146f72a8d4eSPeter Collingbourne // llvm.type.checked.load intrinsics, which indicates that splitting globals
147f72a8d4eSPeter Collingbourne // may be beneficial.
148f72a8d4eSPeter Collingbourne Function *TypeTestFunc =
149f72a8d4eSPeter Collingbourne M.getFunction(Intrinsic::getName(Intrinsic::type_test));
150f72a8d4eSPeter Collingbourne Function *TypeCheckedLoadFunc =
151f72a8d4eSPeter Collingbourne M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
152f72a8d4eSPeter Collingbourne if ((!TypeTestFunc || TypeTestFunc->use_empty()) &&
153f72a8d4eSPeter Collingbourne (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()))
154f72a8d4eSPeter Collingbourne return false;
155f72a8d4eSPeter Collingbourne
156f72a8d4eSPeter Collingbourne bool Changed = false;
1571b108ab9SKazu Hirata for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals()))
158f72a8d4eSPeter Collingbourne Changed |= splitGlobal(GV);
159f72a8d4eSPeter Collingbourne return Changed;
160f72a8d4eSPeter Collingbourne }
161f72a8d4eSPeter Collingbourne
162e9ea08a0SEugene Zelenko namespace {
163e9ea08a0SEugene Zelenko
164f72a8d4eSPeter Collingbourne struct GlobalSplit : public ModulePass {
165f72a8d4eSPeter Collingbourne static char ID;
166e9ea08a0SEugene Zelenko
GlobalSplit__anonbb1fbb120111::GlobalSplit167f72a8d4eSPeter Collingbourne GlobalSplit() : ModulePass(ID) {
168f72a8d4eSPeter Collingbourne initializeGlobalSplitPass(*PassRegistry::getPassRegistry());
169f72a8d4eSPeter Collingbourne }
170e9ea08a0SEugene Zelenko
runOnModule__anonbb1fbb120111::GlobalSplit171e9ea08a0SEugene Zelenko bool runOnModule(Module &M) override {
172f72a8d4eSPeter Collingbourne if (skipModule(M))
173f72a8d4eSPeter Collingbourne return false;
174f72a8d4eSPeter Collingbourne
175f72a8d4eSPeter Collingbourne return splitGlobals(M);
176f72a8d4eSPeter Collingbourne }
177f72a8d4eSPeter Collingbourne };
178f72a8d4eSPeter Collingbourne
179e9ea08a0SEugene Zelenko } // end anonymous namespace
180e9ea08a0SEugene Zelenko
181e9ea08a0SEugene Zelenko char GlobalSplit::ID = 0;
182f72a8d4eSPeter Collingbourne
183f72a8d4eSPeter Collingbourne INITIALIZE_PASS(GlobalSplit, "globalsplit", "Global splitter", false, false)
184f72a8d4eSPeter Collingbourne
createGlobalSplitPass()185f72a8d4eSPeter Collingbourne ModulePass *llvm::createGlobalSplitPass() {
186f72a8d4eSPeter Collingbourne return new GlobalSplit;
187f72a8d4eSPeter Collingbourne }
1882ae76dd2SDavide Italiano
run(Module & M,ModuleAnalysisManager & AM)1892ae76dd2SDavide Italiano PreservedAnalyses GlobalSplitPass::run(Module &M, ModuleAnalysisManager &AM) {
1902ae76dd2SDavide Italiano if (!splitGlobals(M))
1912ae76dd2SDavide Italiano return PreservedAnalyses::all();
1922ae76dd2SDavide Italiano return PreservedAnalyses::none();
1932ae76dd2SDavide Italiano }
194